From e60416d08019d6a78fa49544eb9ac1830bdc6d4c Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig <alyssa@rosenzweig.io> Date: Mon, 29 Apr 2024 21:40:04 -0400 Subject: [PATCH] InstCountCI: Update Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io> --- unittests/InstructionCountCI/AFP/H0F3A.json | 80 +- .../AFP/SVE256/Secondary.json | 19 +- .../AFP/SVE256/Secondary_REP.json | 202 +- .../AFP/SVE256/Secondary_REPNE.json | 192 +- .../InstructionCountCI/AFP/Secondary.json | 19 +- .../InstructionCountCI/AFP/Secondary_REP.json | 202 +- .../AFP/Secondary_REPNE.json | 192 +- .../InstructionCountCI/AFP/VEX_map1.json | 410 +- .../InstructionCountCI/AFP/VEX_map3.json | 80 +- unittests/InstructionCountCI/Atomics.json | 1926 ++-- .../InstructionCountCI/Crypto/H0F38.json | 105 +- .../InstructionCountCI/Crypto/H0F3A.json | 58 +- unittests/InstructionCountCI/DDD.json | 117 +- .../FEXOpt/AddressingLimitations.json | 1078 ++- .../FEXOpt/AddressingLimitations_32Bit.json | 324 +- .../InstructionCountCI/FEXOpt/MultiInst.json | 1033 ++- .../FEXOpt/MultiInst_AFP.json | 13 +- .../InstructionCountCI/FEXOpt/libnss.json | 4900 +++++----- .../InstructionCountCI/FlagM/Atomics.json | 1678 ++-- .../InstructionCountCI/FlagM/FlagOpts.json | 370 +- unittests/InstructionCountCI/FlagM/H0F38.json | 126 +- .../InstructionCountCI/FlagM/HotBlocks.json | 311 +- .../FlagM/HotBlocks_32Bit.json | 1588 ++-- .../FlagM/HotBlocks_AFP.json | 193 +- .../InstructionCountCI/FlagM/Primary.json | 3612 +++++--- .../FlagM/PrimaryGroup.json | 3146 ++++--- .../FlagM/Primary_32Bit.json | 569 +- .../InstructionCountCI/FlagM/Secondary.json | 2162 +++-- .../FlagM/SecondaryGroup.json | 1627 ++-- .../FlagM/SecondaryModRM.json | 26 +- .../FlagM/Secondary_OpSize.json | 60 +- .../FlagM/Secondary_REP.json | 130 +- .../InstructionCountCI/FlagM/VEX_map1.json | 114 +- .../InstructionCountCI/FlagM/VEX_map2.json | 524 +- .../FlagM/VEX_map_group.json | 84 +- unittests/InstructionCountCI/FlagM/x87.json | 7884 ++++++++-------- .../InstructionCountCI/FlagM/x87_f64.json | 5880 ++++++------ unittests/InstructionCountCI/H0F38.json | 920 +- unittests/InstructionCountCI/H0F3A.json | 1622 ++-- .../InstructionCountCI/H0F3A_SVE128.json | 236 +- unittests/InstructionCountCI/Primary.json | 5606 +++++++----- .../InstructionCountCI/PrimaryGroup.json | 4289 +++++---- .../InstructionCountCI/Primary_32Bit.json | 599 +- unittests/InstructionCountCI/RPRES/DDD.json | 16 +- .../InstructionCountCI/RPRES/Secondary.json | 12 +- .../RPRES/Secondary_REP_AFP.json | 16 +- .../RPRES/VEX_map1_AFP.json | 42 +- unittests/InstructionCountCI/Secondary.json | 3871 ++++---- .../InstructionCountCI/SecondaryGroup.json | 1889 ++-- .../InstructionCountCI/SecondaryModRM.json | 31 +- .../InstructionCountCI/Secondary_32Bit.json | 50 +- .../InstructionCountCI/Secondary_OpSize.json | 1317 ++- .../Secondary_OpSize_FCMA.json | 9 +- .../Secondary_OpSize_SVE128.json | 94 +- .../Secondary_OpSize_SVE256.json | 16 +- .../InstructionCountCI/Secondary_REP.json | 617 +- .../InstructionCountCI/Secondary_REPNE.json | 415 +- .../Secondary_REPNE_FCMA.json | 9 +- .../InstructionCountCI/Secondary_SVE128.json | 84 +- unittests/InstructionCountCI/VEX_map1.json | 4960 ++++++---- .../InstructionCountCI/VEX_map1_FCMA.json | 88 +- unittests/InstructionCountCI/VEX_map2.json | 2193 +++-- unittests/InstructionCountCI/VEX_map3.json | 4464 +++++---- .../InstructionCountCI/VEX_map_group.json | 517 +- unittests/InstructionCountCI/x87.json | 7990 +++++++++-------- unittests/InstructionCountCI/x87_f64.json | 6480 ++++++------- 66 files changed, 51525 insertions(+), 37961 deletions(-) diff --git a/unittests/InstructionCountCI/AFP/H0F3A.json b/unittests/InstructionCountCI/AFP/H0F3A.json index 7ee3d95588..154a982d70 100644 --- a/unittests/InstructionCountCI/AFP/H0F3A.json +++ b/unittests/InstructionCountCI/AFP/H0F3A.json @@ -11,103 +11,143 @@ }, "Instructions": { "roundss xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Nearest rounding", "0x66 0x0f 0x3a 0x0a" ], "ExpectedArm64ASM": [ - "frintn s16, s17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frintn s4, s3", + "mov v16.16b, v4.16b" ] }, "roundss xmm0, xmm1, 00000001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "-inf rounding", "0x66 0x0f 0x3a 0x0a" ], "ExpectedArm64ASM": [ - "frintm s16, s17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frintm s4, s3", + "mov v16.16b, v4.16b" ] }, "roundss xmm0, xmm1, 00000010b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "+inf rounding", "0x66 0x0f 0x3a 0x0a" ], "ExpectedArm64ASM": [ - "frintp s16, s17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frintp s4, s3", + "mov v16.16b, v4.16b" ] }, "roundss xmm0, xmm1, 00000011b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "truncate rounding", "0x66 0x0f 0x3a 0x0a" ], "ExpectedArm64ASM": [ - "frintz s16, s17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frintz s4, s3", + "mov v16.16b, v4.16b" ] }, "roundss xmm0, xmm1, 00000100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "host rounding mode rounding", "0x66 0x0f 0x3a 0x0a" ], "ExpectedArm64ASM": [ - "frinti s16, s17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frinti s4, s3", + "mov v16.16b, v4.16b" ] }, "roundsd xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Nearest rounding", "0x66 0x0f 0x3a 0x0b" ], "ExpectedArm64ASM": [ - "frintn d16, d17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frintn d4, d3", + "mov v16.16b, v4.16b" ] }, "roundsd xmm0, xmm1, 00000001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "-inf rounding", "0x66 0x0f 0x3a 0x0b" ], "ExpectedArm64ASM": [ - "frintm d16, d17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frintm d4, d3", + "mov v16.16b, v4.16b" ] }, "roundsd xmm0, xmm1, 00000010b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "+inf rounding", "0x66 0x0f 0x3a 0x0b" ], "ExpectedArm64ASM": [ - "frintp d16, d17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frintp d4, d3", + "mov v16.16b, v4.16b" ] }, "roundsd xmm0, xmm1, 00000011b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "truncate rounding", "0x66 0x0f 0x3a 0x0b" ], "ExpectedArm64ASM": [ - "frintz d16, d17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frintz d4, d3", + "mov v16.16b, v4.16b" ] }, "roundsd xmm0, xmm1, 00000100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "host rounding mode rounding", "0x66 0x0f 0x3a 0x0b" ], "ExpectedArm64ASM": [ - "frinti d16, d17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frinti d4, d3", + "mov v16.16b, v4.16b" ] } } diff --git a/unittests/InstructionCountCI/AFP/SVE256/Secondary.json b/unittests/InstructionCountCI/AFP/SVE256/Secondary.json index 43e9b71437..ee7cfb66a3 100644 --- a/unittests/InstructionCountCI/AFP/SVE256/Secondary.json +++ b/unittests/InstructionCountCI/AFP/SVE256/Secondary.json @@ -10,23 +10,30 @@ }, "Instructions": { "cvtpi2ps xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr d2, [x4]", - "scvtf v16.2s, v2.2s" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "ldr d3, [x20]", + "mov z4.d, z2.d", + "scvtf v4.2s, v3.2s", + "mov z16.d, p7/m, z4.d" ] }, "cvtpi2ps xmm0, mm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "scvtf v16.2s, v2.2s" + "mov z2.d, p7/m, z16.d", + "ldr d3, [x28, #768]", + "mov z4.d, z2.d", + "scvtf v4.2s, v3.2s", + "mov z16.d, p7/m, z4.d" ] } } diff --git a/unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json b/unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json index 3130ee5890..7230fb7ea4 100644 --- a/unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json +++ b/unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json @@ -10,219 +10,307 @@ }, "Instructions": { "cvtsi2ss xmm0, eax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "scvtf s16, w4" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "mov z3.d, z2.d", + "scvtf s3, w20", + "mov z16.d, p7/m, z3.d" ] }, "cvtsi2ss xmm0, dword [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf3 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr s2, [x4]", - "scvtf s16, s2" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "ldr s3, [x20]", + "mov z4.d, z2.d", + "scvtf s4, s3", + "mov z16.d, p7/m, z4.d" ] }, "cvtsi2ss xmm0, qword [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf3 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr x20, [x4]", - "scvtf s16, x20" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "ldr x21, [x20]", + "mov z3.d, z2.d", + "scvtf s3, x21", + "mov z16.d, p7/m, z3.d" ] }, "sqrtss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0xf3 0x0f 0x51", "ExpectedArm64ASM": [ - "fsqrt s16, s17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fsqrt s4, s3", + "mov z16.d, p7/m, z4.d" ] }, "rsqrtss xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "FEAT_FPRES could make this more optimal", "0xf3 0x0f 0x52" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", "fmov s0, #0x70 (1.0000)", - "fsqrt s1, s17", - "fdiv s16, s0, s1" + "fsqrt s1, s3", + "fdiv s4, s0, s1", + "mov z16.d, p7/m, z4.d" ] }, "rcpss xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "FEAT_FPRES could make this more optimal", "0xf3 0x0f 0x53" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", "fmov s0, #0x70 (1.0000)", - "fdiv s16, s0, s17" + "fdiv s4, s0, s3", + "mov z16.d, p7/m, z4.d" ] }, "addss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0x58" ], "ExpectedArm64ASM": [ - "fadd s16, s16, s17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fadd s4, s2, s3", + "mov z16.d, p7/m, z4.d" ] }, "mulss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0x59" ], "ExpectedArm64ASM": [ - "fmul s16, s16, s17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fmul s4, s2, s3", + "mov z16.d, p7/m, z4.d" ] }, "cvtss2sd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0xf3 0x0f 0x5a", "ExpectedArm64ASM": [ - "fcvt d16, s17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fcvt d4, s3", + "mov z16.d, p7/m, z4.d" ] }, "cvtss2sd xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0xf3 0x0f 0x5a", "ExpectedArm64ASM": [ - "ldr d2, [x4]", - "fcvt d16, s2" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "ldr d3, [x20]", + "mov z4.d, z2.d", + "fcvt d4, s3", + "mov z16.d, p7/m, z4.d" ] }, "subss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0x5c" ], "ExpectedArm64ASM": [ - "fsub s16, s16, s17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fsub s4, s2, s3", + "mov z16.d, p7/m, z4.d" ] }, "minss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0x5d" ], "ExpectedArm64ASM": [ - "fmin s16, s16, s17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fmin s4, s2, s3", + "mov z16.d, p7/m, z4.d" ] }, "divss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0x5e" ], "ExpectedArm64ASM": [ - "fdiv s16, s16, s17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fdiv s4, s2, s3", + "mov z16.d, p7/m, z4.d" ] }, "maxss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0x5f" ], "ExpectedArm64ASM": [ - "fmax s16, s16, s17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fmax s4, s2, s3", + "mov z16.d, p7/m, z4.d" ] }, "cmpss xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmeq s16, s16, s17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fcmeq s4, s2, s3", + "mov z16.d, p7/m, z4.d" ] }, "cmpss xmm0, xmm1, 1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmgt s16, s17, s16" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fcmgt s4, s3, s2", + "mov z16.d, p7/m, z4.d" ] }, "cmpss xmm0, xmm1, 2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge s16, s17, s16" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fcmge s4, s3, s2", + "mov z16.d, p7/m, z4.d" ] }, "cmpss xmm0, xmm1, 3": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge s0, s16, s17", - "fcmgt s1, s17, s16", + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fcmge s0, s2, s3", + "fcmgt s1, s3, s2", "orr v0.8b, v0.8b, v1.8b", "mvn v0.8b, v0.8b", "ptrue p0.s, vl1", - "mov z16.s, p0/m, z0.s" + "mov z4.s, p0/m, z0.s", + "mov z16.d, p7/m, z4.d" ] }, "cmpss xmm0, xmm1, 4": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmeq s0, s16, s17", + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fcmeq s0, s2, s3", "mvn v0.8b, v0.8b", "ptrue p0.s, vl1", - "mov z16.s, p0/m, z0.s" + "mov z4.s, p0/m, z0.s", + "mov z16.d, p7/m, z4.d" ] }, "cmpss xmm0, xmm1, 5": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmgt s2, s17, s16", - "mvn v2.16b, v2.16b", - "mov v16.s[0], v2.s[0]" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "fcmgt s4, s3, s2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "cmpss xmm0, xmm1, 6": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge s2, s17, s16", - "mvn v2.16b, v2.16b", - "mov v16.s[0], v2.s[0]" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "fcmge s4, s3, s2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "cmpss xmm0, xmm1, 7": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge s0, s16, s17", - "fcmgt s1, s17, s16", + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fcmge s0, s2, s3", + "fcmgt s1, s3, s2", "orr v0.8b, v0.8b, v1.8b", "ptrue p0.s, vl1", - "mov z16.s, p0/m, z0.s" + "mov z4.s, p0/m, z0.s", + "mov z16.d, p7/m, z4.d" ] } } diff --git a/unittests/InstructionCountCI/AFP/SVE256/Secondary_REPNE.json b/unittests/InstructionCountCI/AFP/SVE256/Secondary_REPNE.json index b95a4d724b..4453a9005b 100644 --- a/unittests/InstructionCountCI/AFP/SVE256/Secondary_REPNE.json +++ b/unittests/InstructionCountCI/AFP/SVE256/Secondary_REPNE.json @@ -10,211 +10,295 @@ }, "Instructions": { "cvtsi2sd xmm0, eax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "scvtf d16, w4" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "mov z3.d, z2.d", + "scvtf d3, w20", + "mov z16.d, p7/m, z3.d" ] }, "cvtsi2sd xmm0, dword [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf2 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr w20, [x4]", - "scvtf d16, w20" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "ldr w21, [x20]", + "mov z3.d, z2.d", + "scvtf d3, w21", + "mov z16.d, p7/m, z3.d" ] }, "cvtsi2sd xmm0, rax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "scvtf d16, x4" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "mov z3.d, z2.d", + "scvtf d3, x20", + "mov z16.d, p7/m, z3.d" ] }, "cvtsi2sd xmm0, qword [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf2 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr d2, [x4]", - "scvtf d16, d2" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "ldr d3, [x20]", + "mov z4.d, z2.d", + "scvtf d4, d3", + "mov z16.d, p7/m, z4.d" ] }, "sqrtsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x51" ], "ExpectedArm64ASM": [ - "fsqrt d16, d17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fsqrt d4, d3", + "mov z16.d, p7/m, z4.d" ] }, "addsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x58" ], "ExpectedArm64ASM": [ - "fadd d16, d16, d17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fadd d4, d2, d3", + "mov z16.d, p7/m, z4.d" ] }, "mulsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x59" ], "ExpectedArm64ASM": [ - "fmul d16, d16, d17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fmul d4, d2, d3", + "mov z16.d, p7/m, z4.d" ] }, "cvtsd2ss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x5a" ], "ExpectedArm64ASM": [ - "fcvt s16, d17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fcvt s4, d3", + "mov z16.d, p7/m, z4.d" ] }, "cvtsd2ss xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf2 0x0f 0x5a" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "fcvt s16, d2" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "ldr q3, [x20]", + "mov z4.d, z2.d", + "fcvt s4, d3", + "mov z16.d, p7/m, z4.d" ] }, "subsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x5c" ], "ExpectedArm64ASM": [ - "fsub d16, d16, d17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fsub d4, d2, d3", + "mov z16.d, p7/m, z4.d" ] }, "minsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x5d" ], "ExpectedArm64ASM": [ - "fmin d16, d16, d17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fmin d4, d2, d3", + "mov z16.d, p7/m, z4.d" ] }, "divsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x5e" ], "ExpectedArm64ASM": [ - "fdiv d16, d16, d17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fdiv d4, d2, d3", + "mov z16.d, p7/m, z4.d" ] }, "maxsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x5f" ], "ExpectedArm64ASM": [ - "fmax d16, d16, d17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fmax d4, d2, d3", + "mov z16.d, p7/m, z4.d" ] }, "cmpsd xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmeq d16, d16, d17" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fcmeq d4, d2, d3", + "mov z16.d, p7/m, z4.d" ] }, "cmpsd xmm0, xmm1, 1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmgt d16, d17, d16" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fcmgt d4, d3, d2", + "mov z16.d, p7/m, z4.d" ] }, "cmpsd xmm0, xmm1, 2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge d16, d17, d16" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fcmge d4, d3, d2", + "mov z16.d, p7/m, z4.d" ] }, "cmpsd xmm0, xmm1, 3": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge d0, d16, d17", - "fcmgt d1, d17, d16", + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fcmge d0, d2, d3", + "fcmgt d1, d3, d2", "orr v0.8b, v0.8b, v1.8b", "mvn v0.8b, v0.8b", "ptrue p0.d, vl1", - "mov z16.d, p0/m, z0.d" + "mov z4.d, p0/m, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "cmpsd xmm0, xmm1, 4": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmeq d0, d16, d17", + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fcmeq d0, d2, d3", "mvn v0.8b, v0.8b", "ptrue p0.d, vl1", - "mov z16.d, p0/m, z0.d" + "mov z4.d, p0/m, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "cmpsd xmm0, xmm1, 5": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmgt d2, d17, d16", - "mvn v2.16b, v2.16b", - "mov v16.d[0], v2.d[0]" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "fcmgt d4, d3, d2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "cmpsd xmm0, xmm1, 6": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge d2, d17, d16", - "mvn v2.16b, v2.16b", - "mov v16.d[0], v2.d[0]" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "fcmge d4, d3, d2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "cmpsd xmm0, xmm1, 7": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge d0, d16, d17", - "fcmgt d1, d17, d16", + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov z4.d, z2.d", + "fcmge d0, d2, d3", + "fcmgt d1, d3, d2", "orr v0.8b, v0.8b, v1.8b", "ptrue p0.d, vl1", - "mov z16.d, p0/m, z0.d" + "mov z4.d, p0/m, z0.d", + "mov z16.d, p7/m, z4.d" ] } } diff --git a/unittests/InstructionCountCI/AFP/Secondary.json b/unittests/InstructionCountCI/AFP/Secondary.json index b0194320b4..c38c35bd77 100644 --- a/unittests/InstructionCountCI/AFP/Secondary.json +++ b/unittests/InstructionCountCI/AFP/Secondary.json @@ -11,23 +11,30 @@ }, "Instructions": { "cvtpi2ps xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr d2, [x4]", - "scvtf v16.2s, v2.2s" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr d3, [x20]", + "mov v4.16b, v2.16b", + "scvtf v4.2s, v3.2s", + "mov v16.16b, v4.16b" ] }, "cvtpi2ps xmm0, mm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "scvtf v16.2s, v2.2s" + "mov v2.16b, v16.16b", + "ldr d3, [x28, #768]", + "mov v4.16b, v2.16b", + "scvtf v4.2s, v3.2s", + "mov v16.16b, v4.16b" ] } } diff --git a/unittests/InstructionCountCI/AFP/Secondary_REP.json b/unittests/InstructionCountCI/AFP/Secondary_REP.json index 055c99cd2b..ba735a0e96 100644 --- a/unittests/InstructionCountCI/AFP/Secondary_REP.json +++ b/unittests/InstructionCountCI/AFP/Secondary_REP.json @@ -11,216 +11,304 @@ }, "Instructions": { "cvtsi2ss xmm0, eax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "scvtf s16, w4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "scvtf s3, w20", + "mov v16.16b, v3.16b" ] }, "cvtsi2ss xmm0, dword [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf3 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr s2, [x4]", - "scvtf s16, s2" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr s3, [x20]", + "mov v4.16b, v2.16b", + "scvtf s4, s3", + "mov v16.16b, v4.16b" ] }, "cvtsi2ss xmm0, qword [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf3 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr x20, [x4]", - "scvtf s16, x20" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr x21, [x20]", + "mov v3.16b, v2.16b", + "scvtf s3, x21", + "mov v16.16b, v3.16b" ] }, "sqrtss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0xf3 0x0f 0x51", "ExpectedArm64ASM": [ - "fsqrt s16, s17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fsqrt s4, s3", + "mov v16.16b, v4.16b" ] }, "rsqrtss xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "FEAT_FPRES could make this more optimal", "0xf3 0x0f 0x52" ], "ExpectedArm64ASM": [ + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", "fmov s0, #0x70 (1.0000)", - "fsqrt s1, s17", - "fdiv s16, s0, s1" + "fsqrt s1, s3", + "fdiv s4, s0, s1", + "mov v16.16b, v4.16b" ] }, "rcpss xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "FEAT_FPRES could make this more optimal", "0xf3 0x0f 0x53" ], "ExpectedArm64ASM": [ + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", "fmov s0, #0x70 (1.0000)", - "fdiv s16, s0, s17" + "fdiv s4, s0, s3", + "mov v16.16b, v4.16b" ] }, "addss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0x58" ], "ExpectedArm64ASM": [ - "fadd s16, s16, s17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fadd s4, s2, s3", + "mov v16.16b, v4.16b" ] }, "mulss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0x59" ], "ExpectedArm64ASM": [ - "fmul s16, s16, s17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fmul s4, s2, s3", + "mov v16.16b, v4.16b" ] }, "cvtss2sd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0xf3 0x0f 0x5a", "ExpectedArm64ASM": [ - "fcvt d16, s17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcvt d4, s3", + "mov v16.16b, v4.16b" ] }, "cvtss2sd xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0xf3 0x0f 0x5a", "ExpectedArm64ASM": [ - "ldr d2, [x4]", - "fcvt d16, s2" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr d3, [x20]", + "mov v4.16b, v2.16b", + "fcvt d4, s3", + "mov v16.16b, v4.16b" ] }, "subss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0x5c" ], "ExpectedArm64ASM": [ - "fsub s16, s16, s17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fsub s4, s2, s3", + "mov v16.16b, v4.16b" ] }, "minss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0x5d" ], "ExpectedArm64ASM": [ - "fmin s16, s16, s17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fmin s4, s2, s3", + "mov v16.16b, v4.16b" ] }, "divss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0x5e" ], "ExpectedArm64ASM": [ - "fdiv s16, s16, s17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fdiv s4, s2, s3", + "mov v16.16b, v4.16b" ] }, "maxss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0x5f" ], "ExpectedArm64ASM": [ - "fmax s16, s16, s17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fmax s4, s2, s3", + "mov v16.16b, v4.16b" ] }, "cmpss xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmeq s16, s16, s17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmeq s4, s2, s3", + "mov v16.16b, v4.16b" ] }, "cmpss xmm0, xmm1, 1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmgt s16, s17, s16" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmgt s4, s3, s2", + "mov v16.16b, v4.16b" ] }, "cmpss xmm0, xmm1, 2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge s16, s17, s16" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmge s4, s3, s2", + "mov v16.16b, v4.16b" ] }, "cmpss xmm0, xmm1, 3": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge s0, s16, s17", - "fcmgt s1, s17, s16", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmge s0, s2, s3", + "fcmgt s1, s3, s2", "orr v0.8b, v0.8b, v1.8b", "mvn v0.8b, v0.8b", - "mov v16.s[0], v0.s[0]" + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "cmpss xmm0, xmm1, 4": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmeq s0, s16, s17", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmeq s0, s2, s3", "mvn v0.8b, v0.8b", - "mov v16.s[0], v0.s[0]" + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "cmpss xmm0, xmm1, 5": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmgt s2, s17, s16", - "mvn v2.16b, v2.16b", - "mov v16.s[0], v2.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fcmgt s4, s3, s2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov v16.16b, v4.16b" ] }, "cmpss xmm0, xmm1, 6": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge s2, s17, s16", - "mvn v2.16b, v2.16b", - "mov v16.s[0], v2.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fcmge s4, s3, s2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov v16.16b, v4.16b" ] }, "cmpss xmm0, xmm1, 7": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge s0, s16, s17", - "fcmgt s1, s17, s16", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmge s0, s2, s3", + "fcmgt s1, s3, s2", "orr v0.8b, v0.8b, v1.8b", - "mov v16.s[0], v0.s[0]" + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] } } diff --git a/unittests/InstructionCountCI/AFP/Secondary_REPNE.json b/unittests/InstructionCountCI/AFP/Secondary_REPNE.json index e37a74d6f6..a2407c9e0f 100644 --- a/unittests/InstructionCountCI/AFP/Secondary_REPNE.json +++ b/unittests/InstructionCountCI/AFP/Secondary_REPNE.json @@ -11,208 +11,292 @@ }, "Instructions": { "cvtsi2sd xmm0, eax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "scvtf d16, w4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "scvtf d3, w20", + "mov v16.16b, v3.16b" ] }, "cvtsi2sd xmm0, dword [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf2 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr w20, [x4]", - "scvtf d16, w20" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr w21, [x20]", + "mov v3.16b, v2.16b", + "scvtf d3, w21", + "mov v16.16b, v3.16b" ] }, "cvtsi2sd xmm0, rax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "scvtf d16, x4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "scvtf d3, x20", + "mov v16.16b, v3.16b" ] }, "cvtsi2sd xmm0, qword [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf2 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr d2, [x4]", - "scvtf d16, d2" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr d3, [x20]", + "mov v4.16b, v2.16b", + "scvtf d4, d3", + "mov v16.16b, v4.16b" ] }, "sqrtsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x51" ], "ExpectedArm64ASM": [ - "fsqrt d16, d17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fsqrt d4, d3", + "mov v16.16b, v4.16b" ] }, "addsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x58" ], "ExpectedArm64ASM": [ - "fadd d16, d16, d17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fadd d4, d2, d3", + "mov v16.16b, v4.16b" ] }, "mulsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x59" ], "ExpectedArm64ASM": [ - "fmul d16, d16, d17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fmul d4, d2, d3", + "mov v16.16b, v4.16b" ] }, "cvtsd2ss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x5a" ], "ExpectedArm64ASM": [ - "fcvt s16, d17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcvt s4, d3", + "mov v16.16b, v4.16b" ] }, "cvtsd2ss xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf2 0x0f 0x5a" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "fcvt s16, d2" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr q3, [x20]", + "mov v4.16b, v2.16b", + "fcvt s4, d3", + "mov v16.16b, v4.16b" ] }, "subsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x5c" ], "ExpectedArm64ASM": [ - "fsub d16, d16, d17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fsub d4, d2, d3", + "mov v16.16b, v4.16b" ] }, "minsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x5d" ], "ExpectedArm64ASM": [ - "fmin d16, d16, d17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fmin d4, d2, d3", + "mov v16.16b, v4.16b" ] }, "divsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x5e" ], "ExpectedArm64ASM": [ - "fdiv d16, d16, d17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fdiv d4, d2, d3", + "mov v16.16b, v4.16b" ] }, "maxsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0x5f" ], "ExpectedArm64ASM": [ - "fmax d16, d16, d17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fmax d4, d2, d3", + "mov v16.16b, v4.16b" ] }, "cmpsd xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmeq d16, d16, d17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmeq d4, d2, d3", + "mov v16.16b, v4.16b" ] }, "cmpsd xmm0, xmm1, 1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmgt d16, d17, d16" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmgt d4, d3, d2", + "mov v16.16b, v4.16b" ] }, "cmpsd xmm0, xmm1, 2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge d16, d17, d16" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmge d4, d3, d2", + "mov v16.16b, v4.16b" ] }, "cmpsd xmm0, xmm1, 3": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge d0, d16, d17", - "fcmgt d1, d17, d16", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmge d0, d2, d3", + "fcmgt d1, d3, d2", "orr v0.8b, v0.8b, v1.8b", "mvn v0.8b, v0.8b", - "mov v16.d[0], v0.d[0]" + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "cmpsd xmm0, xmm1, 4": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmeq d0, d16, d17", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmeq d0, d2, d3", "mvn v0.8b, v0.8b", - "mov v16.d[0], v0.d[0]" + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "cmpsd xmm0, xmm1, 5": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmgt d2, d17, d16", - "mvn v2.16b, v2.16b", - "mov v16.d[0], v2.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fcmgt d4, d3, d2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov v16.16b, v4.16b" ] }, "cmpsd xmm0, xmm1, 6": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge d2, d17, d16", - "mvn v2.16b, v2.16b", - "mov v16.d[0], v2.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fcmge d4, d3, d2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov v16.16b, v4.16b" ] }, "cmpsd xmm0, xmm1, 7": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge d0, d16, d17", - "fcmgt d1, d17, d16", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmge d0, d2, d3", + "fcmgt d1, d3, d2", "orr v0.8b, v0.8b, v1.8b", - "mov v16.d[0], v0.d[0]" + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] } } diff --git a/unittests/InstructionCountCI/AFP/VEX_map1.json b/unittests/InstructionCountCI/AFP/VEX_map1.json index 3ba72ceb85..ac96da3810 100644 --- a/unittests/InstructionCountCI/AFP/VEX_map1.json +++ b/unittests/InstructionCountCI/AFP/VEX_map1.json @@ -10,442 +10,562 @@ }, "Instructions": { "vsqrtss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b10 0x51 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fsqrt s16, s18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fsqrt s4, s3", + "mov z16.d, p7/m, z4.d" ] }, "vsqrtsd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b11 0x51 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fsqrt d16, d18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fsqrt d4, d3", + "mov z16.d, p7/m, z4.d" ] }, "vrsqrtss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "FEAT_FPRES could make this more optimal", "Map 1 0b10 0x52 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", "fmov s0, #0x70 (1.0000)", - "fsqrt s1, s18", - "fdiv s16, s0, s1" + "fsqrt s1, s3", + "fdiv s4, s0, s1", + "mov z16.d, p7/m, z4.d" ] }, "vrcpss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "FEAT_FPRES could make this more optimal", "Map 1 0b10 0x53 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", "fmov s0, #0x70 (1.0000)", - "fdiv s16, s0, s18" + "fdiv s4, s0, s3", + "mov z16.d, p7/m, z4.d" ] }, "vcmpss xmm0, xmm1, xmm2, 0x00": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b10 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmeq s16, s17, s18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmeq s4, s2, s3", + "mov z16.d, p7/m, z4.d" ] }, "vcmpss xmm0, xmm1, xmm2, 0x01": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b10 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmgt s16, s18, s17" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmgt s4, s3, s2", + "mov z16.d, p7/m, z4.d" ] }, "vcmpss xmm0, xmm1, xmm2, 0x02": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b10 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmge s16, s18, s17" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmge s4, s3, s2", + "mov z16.d, p7/m, z4.d" ] }, "vcmpss xmm0, xmm1, xmm2, 0x03": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b10 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmge s0, s17, s18", - "fcmgt s1, s18, s17", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmge s0, s2, s3", + "fcmgt s1, s3, s2", "orr v0.8b, v0.8b, v1.8b", "mvn v0.8b, v0.8b", - "mov v16.s[0], v0.s[0]" + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpss xmm0, xmm1, xmm2, 0x04": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b10 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmeq s0, s17, s18", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmeq s0, s2, s3", "mvn v0.8b, v0.8b", - "mov v16.s[0], v0.s[0]" + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpss xmm0, xmm1, xmm2, 0x05": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b10 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmgt s2, s18, s17", - "mvn v2.16b, v2.16b", - "mov v16.16b, v17.16b", - "mov v16.s[0], v2.s[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt s4, s3, s2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpss xmm0, xmm1, xmm2, 0x06": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b10 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmge s2, s18, s17", - "mvn v2.16b, v2.16b", - "mov v16.16b, v17.16b", - "mov v16.s[0], v2.s[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmge s4, s3, s2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpss xmm0, xmm1, xmm2, 0x07": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b10 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmge s0, s17, s18", - "fcmgt s1, s18, s17", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmge s0, s2, s3", + "fcmgt s1, s3, s2", "orr v0.8b, v0.8b, v1.8b", - "mov v16.s[0], v0.s[0]" + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpsd xmm0, xmm1, xmm2, 0x00": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b11 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmeq d16, d17, d18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmeq d4, d2, d3", + "mov z16.d, p7/m, z4.d" ] }, "vcmpsd xmm0, xmm1, xmm2, 0x01": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b11 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmgt d16, d18, d17" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmgt d4, d3, d2", + "mov z16.d, p7/m, z4.d" ] }, "vcmpsd xmm0, xmm1, xmm2, 0x02": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b11 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmge d16, d18, d17" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmge d4, d3, d2", + "mov z16.d, p7/m, z4.d" ] }, "vcmpsd xmm0, xmm1, xmm2, 0x03": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b11 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmge d0, d17, d18", - "fcmgt d1, d18, d17", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmge d0, d2, d3", + "fcmgt d1, d3, d2", "orr v0.8b, v0.8b, v1.8b", "mvn v0.8b, v0.8b", - "mov v16.d[0], v0.d[0]" + "mov v4.d[0], v0.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpsd xmm0, xmm1, xmm2, 0x04": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b11 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmeq d0, d17, d18", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmeq d0, d2, d3", "mvn v0.8b, v0.8b", - "mov v16.d[0], v0.d[0]" + "mov v4.d[0], v0.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpsd xmm0, xmm1, xmm2, 0x05": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b11 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmgt d2, d18, d17", - "mvn v2.16b, v2.16b", - "mov v16.16b, v17.16b", - "mov v16.d[0], v2.d[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt d4, d3, d2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpsd xmm0, xmm1, xmm2, 0x06": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b11 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmge d2, d18, d17", - "mvn v2.16b, v2.16b", - "mov v16.16b, v17.16b", - "mov v16.d[0], v2.d[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmge d4, d3, d2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpsd xmm0, xmm1, xmm2, 0x07": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b11 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmge d0, d17, d18", - "fcmgt d1, d18, d17", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmge d0, d2, d3", + "fcmgt d1, d3, d2", "orr v0.8b, v0.8b, v1.8b", - "mov v16.d[0], v0.d[0]" + "mov v4.d[0], v0.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcvtsi2ss xmm0, xmm1, eax": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b10 0x2A 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "scvtf s16, w4" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "scvtf s3, w20", + "mov z16.d, p7/m, z3.d" ] }, "vcvtsi2ss xmm0, xmm1, rax": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b10 0x2A 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "scvtf s16, x4" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "scvtf s3, x20", + "mov z16.d, p7/m, z3.d" ] }, "vcvtsi2sd xmm0, xmm1, eax": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b11 0x2A 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "scvtf d16, w4" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "scvtf d3, w20", + "mov z16.d, p7/m, z3.d" ] }, "vcvtsi2sd xmm0, xmm1, rax": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b11 0x2A 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "scvtf d16, x4" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "scvtf d3, x20", + "mov z16.d, p7/m, z3.d" ] }, "vmulss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b10 0x59 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fmul s16, s17, s18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fmul s4, s2, s3", + "mov z16.d, p7/m, z4.d" ] }, "vmulsd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b11 0x59 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fmul d16, d17, d18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fmul d4, d2, d3", + "mov z16.d, p7/m, z4.d" ] }, "vcvtss2sd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b10 0x5a 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcvt d16, s18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcvt d4, s3", + "mov z16.d, p7/m, z4.d" ] }, "vcvtsd2ss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b11 0x5a 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcvt s16, d18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcvt s4, d3", + "mov z16.d, p7/m, z4.d" ] }, "vsubss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b10 0x5c 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fsub s16, s17, s18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fsub s4, s2, s3", + "mov z16.d, p7/m, z4.d" ] }, "vsubsd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b11 0x5c 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fsub d16, d17, d18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fsub d4, d2, d3", + "mov z16.d, p7/m, z4.d" ] }, "vminss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b10 0x5d 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fmin s16, s17, s18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fmin s4, s2, s3", + "mov z16.d, p7/m, z4.d" ] }, "vminsd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b11 0x5d 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fmin d16, d17, d18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fmin d4, d2, d3", + "mov z16.d, p7/m, z4.d" ] }, "vdivss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b10 0x5e 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fdiv s16, s17, s18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fdiv s4, s2, s3", + "mov z16.d, p7/m, z4.d" ] }, "vdivsd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b11 0x5e 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fdiv d16, d17, d18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fdiv d4, d2, d3", + "mov z16.d, p7/m, z4.d" ] }, "vmaxss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b10 0x5f 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fmax s16, s17, s18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fmax s4, s2, s3", + "mov z16.d, p7/m, z4.d" ] }, "vmaxsd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b11 0x5f 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fmax d16, d17, d18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fmax d4, d2, d3", + "mov z16.d, p7/m, z4.d" ] }, "vminps xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b00 0x5d 128-bit" ], "ExpectedArm64ASM": [ - "fcmgt v0.4s, v18.4s, v17.4s", - "mov v16.16b, v17.16b", - "bif v16.16b, v18.16b, v0.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt v0.4s, v3.4s, v2.4s", + "mov v4.16b, v2.16b", + "bif v4.16b, v3.16b, v0.16b", + "mov z16.d, p7/m, z4.d" ] }, "vminps ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b00 0x5d 256-bit" ], "ExpectedArm64ASM": [ - "fcmgt p0.s, p7/z, z18.s, z17.s", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt p0.s, p7/z, z3.s, z2.s", "not p0.b, p7/z, p0.b", - "mov z0.d, z17.d", - "mov z0.s, p0/m, z18.s", - "mov z16.d, z0.d" + "mov z0.d, z2.d", + "mov z0.s, p0/m, z3.s", + "mov z4.d, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vminpd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0x5d 128-bit" ], "ExpectedArm64ASM": [ - "fcmgt v0.2d, v18.2d, v17.2d", - "mov v16.16b, v17.16b", - "bif v16.16b, v18.16b, v0.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt v0.2d, v3.2d, v2.2d", + "mov v4.16b, v2.16b", + "bif v4.16b, v3.16b, v0.16b", + "mov z16.d, p7/m, z4.d" ] }, "vminpd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b01 0x5d 256-bit" ], "ExpectedArm64ASM": [ - "fcmgt p0.d, p7/z, z18.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt p0.d, p7/z, z3.d, z2.d", "not p0.b, p7/z, p0.b", - "mov z0.d, z17.d", - "mov z0.d, p0/m, z18.d", - "mov z16.d, z0.d" + "mov z0.d, z2.d", + "mov z0.d, p0/m, z3.d", + "mov z4.d, z0.d", + "mov z16.d, p7/m, z4.d" ] } } diff --git a/unittests/InstructionCountCI/AFP/VEX_map3.json b/unittests/InstructionCountCI/AFP/VEX_map3.json index fa4f9c943d..7c59988a64 100644 --- a/unittests/InstructionCountCI/AFP/VEX_map3.json +++ b/unittests/InstructionCountCI/AFP/VEX_map3.json @@ -10,113 +10,133 @@ }, "Instructions": { "vroundss xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "nearest rounding", "Map 3 0b01 0x0a 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frintn s16, s16" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frintn s3, s2", + "mov z16.d, p7/m, z3.d" ] }, "vroundss xmm0, xmm1, 00000001b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "-inf rounding", "Map 3 0b01 0x0a 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frintm s16, s16" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frintm s3, s2", + "mov z16.d, p7/m, z3.d" ] }, "vroundss xmm0, xmm1, 00000010b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "+inf rounding", "Map 3 0b01 0x0a 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frintp s16, s16" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frintp s3, s2", + "mov z16.d, p7/m, z3.d" ] }, "vroundss xmm0, xmm1, 00000011b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "truncate rounding", "Map 3 0b01 0x0a 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frintz s16, s16" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frintz s3, s2", + "mov z16.d, p7/m, z3.d" ] }, "vroundss xmm0, xmm1, 00000100b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "host mode rounding", "Map 3 0b01 0x0a 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frinti s16, s16" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frinti s3, s2", + "mov z16.d, p7/m, z3.d" ] }, "vroundsd xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "nearest rounding", "Map 3 0b01 0x0b 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frintn d16, d16" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frintn d3, d2", + "mov z16.d, p7/m, z3.d" ] }, "vroundsd xmm0, xmm1, 00000001b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "-inf rounding", "Map 3 0b01 0x0b 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frintm d16, d16" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frintm d3, d2", + "mov z16.d, p7/m, z3.d" ] }, "vroundsd xmm0, xmm1, 00000010b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "+inf rounding", "Map 3 0b01 0x0b 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frintp d16, d16" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frintp d3, d2", + "mov z16.d, p7/m, z3.d" ] }, "vroundsd xmm0, xmm1, 00000011b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "truncate rounding", "Map 3 0b01 0x0b 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frintz d16, d16" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frintz d3, d2", + "mov z16.d, p7/m, z3.d" ] }, "vroundsd xmm0, xmm1, 00000100b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "host mode rounding", "Map 3 0b01 0x0b 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frinti d16, d16" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frinti d3, d2", + "mov z16.d, p7/m, z3.d" ] } } diff --git a/unittests/InstructionCountCI/Atomics.json b/unittests/InstructionCountCI/Atomics.json index 9936778c38..9f8e5e4763 100644 --- a/unittests/InstructionCountCI/Atomics.json +++ b/unittests/InstructionCountCI/Atomics.json @@ -11,1631 +11,1983 @@ }, "Instructions": { "lock add byte [rax], cl": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x00", "ExpectedArm64ASM": [ - "ldaddalb w5, w20, [x4]", - "eor w27, w20, w5", - "lsl w0, w20, #24", - "cmn w0, w5, lsl #24", - "add w26, w20, w5" + "mov x20, x5", + "mov x21, x4", + "ldaddalb w20, w22, [x21]", + "eor w21, w22, w20", + "mov x27, x21", + "lsl w0, w22, #24", + "cmn w0, w20, lsl #24", + "add w21, w22, w20", + "mov x26, x21" ] }, "lock add word [rax], cx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x01", "ExpectedArm64ASM": [ - "ldaddalh w5, w20, [x4]", - "eor w27, w20, w5", - "lsl w0, w20, #16", - "cmn w0, w5, lsl #16", - "add w26, w20, w5" + "mov x20, x5", + "mov x21, x4", + "ldaddalh w20, w22, [x21]", + "eor w21, w22, w20", + "mov x27, x21", + "lsl w0, w22, #16", + "cmn w0, w20, lsl #16", + "add w21, w22, w20", + "mov x26, x21" ] }, "lock add dword [rax], ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x01", "ExpectedArm64ASM": [ - "ldaddal w5, w20, [x4]", - "eor w27, w20, w5", - "adds w26, w20, w5" + "mov x20, x5", + "mov x21, x4", + "ldaddal w20, w22, [x21]", + "eor w21, w22, w20", + "mov x27, x21", + "adds w21, w22, w20", + "mov x26, x21" ] }, "lock or byte [rax], cl": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x08", "ExpectedArm64ASM": [ - "ldsetalb w5, w20, [x4]", - "orr w26, w20, w5", - "cmn wzr, w26, lsl #24" + "mov x20, x5", + "mov x21, x4", + "ldsetalb w20, w22, [x21]", + "orr w21, w22, w20", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "lock or word [rax], cx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x09", "ExpectedArm64ASM": [ - "ldsetalh w5, w20, [x4]", - "orr w26, w20, w5", - "cmn wzr, w26, lsl #16" + "mov x20, x5", + "mov x21, x4", + "ldsetalh w20, w22, [x21]", + "orr w21, w22, w20", + "mov x26, x21", + "cmn wzr, w21, lsl #16" ] }, "lock or dword [rax], ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x09", "ExpectedArm64ASM": [ - "ldsetal w5, w20, [x4]", - "orr w26, w20, w5", - "tst w26, w26" + "mov x20, x5", + "mov x21, x4", + "ldsetal w20, w22, [x21]", + "orr w21, w22, w20", + "mov x26, x21", + "tst w21, w21" ] }, "lock adc byte [rax], cl": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 25, "Comment": "0x10", "ExpectedArm64ASM": [ - "adc w20, wzr, w5", - "ldaddalb w20, w20, [x4]", - "eor w27, w20, w5", + "mov x20, x5", + "adc w21, wzr, w20", + "mov x22, x4", + "ldaddalb w21, w23, [x22]", + "eor w21, w23, w20", + "mov x27, x21", "cset w21, hs", - "adc w22, w20, w5", - "uxtb w26, w22", - "cmp x26, x5", + "adc w22, w23, w20", + "uxtb w24, w22", + "cmp x24, x20", "cset x22, lo", - "cmp x26, x5", - "cset x23, ls", + "cmp x24, x20", + "cset x25, ls", "cmp x21, #0x1 (1)", - "csel x21, x23, x22, eq", - "cmn wzr, w26, lsl #24", - "mrs x22, nzcv", - "orr w21, w22, w21, lsl #29", - "eor w22, w20, w5", - "eor w20, w26, w20", - "bic w20, w20, w22", - "ubfx x20, x20, #7, #1", - "orr w20, w21, w20, lsl #28", - "msr nzcv, x20" + "csel x30, x25, x22, eq", + "cmn wzr, w24, lsl #24", + "mrs x21, nzcv", + "orr w22, w21, w30, lsl #29", + "eor w21, w23, w20", + "eor w20, w24, w23", + "bic w23, w20, w21", + "ubfx x20, x23, #7, #1", + "orr w21, w22, w20, lsl #28", + "mov x26, x24", + "msr nzcv, x21" ] }, "lock adc word [rax], cx": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 25, "Comment": "0x11", "ExpectedArm64ASM": [ - "adc w20, wzr, w5", - "ldaddalh w20, w20, [x4]", - "eor w27, w20, w5", + "mov x20, x5", + "adc w21, wzr, w20", + "mov x22, x4", + "ldaddalh w21, w23, [x22]", + "eor w21, w23, w20", + "mov x27, x21", "cset w21, hs", - "adc w22, w20, w5", - "uxth w26, w22", - "cmp x26, x5", + "adc w22, w23, w20", + "uxth w24, w22", + "cmp x24, x20", "cset x22, lo", - "cmp x26, x5", - "cset x23, ls", + "cmp x24, x20", + "cset x25, ls", "cmp x21, #0x1 (1)", - "csel x21, x23, x22, eq", - "cmn wzr, w26, lsl #16", - "mrs x22, nzcv", - "orr w21, w22, w21, lsl #29", - "eor w22, w20, w5", - "eor w20, w26, w20", - "bic w20, w20, w22", - "ubfx x20, x20, #15, #1", - "orr w20, w21, w20, lsl #28", - "msr nzcv, x20" + "csel x30, x25, x22, eq", + "cmn wzr, w24, lsl #16", + "mrs x21, nzcv", + "orr w22, w21, w30, lsl #29", + "eor w21, w23, w20", + "eor w20, w24, w23", + "bic w23, w20, w21", + "ubfx x20, x23, #15, #1", + "orr w21, w22, w20, lsl #28", + "mov x26, x24", + "msr nzcv, x21" ] }, "lock adc dword [rax], ecx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": "0x11", "ExpectedArm64ASM": [ - "adc w20, wzr, w5", - "ldaddal w20, w20, [x4]", - "eor w27, w20, w5", - "adcs w26, w20, w5" + "mov x20, x5", + "adc w21, wzr, w20", + "mov x22, x4", + "ldaddal w21, w23, [x22]", + "eor w21, w23, w20", + "mov x27, x21", + "adcs w21, w23, w20", + "mov x26, x21" ] }, "lock sbb byte [rax], cl": { - "ExpectedInstructionCount": 23, + "ExpectedInstructionCount": 27, "Comment": "0x18", "ExpectedArm64ASM": [ - "adc w20, wzr, w5", - "neg w1, w20", - "ldaddalb w1, w20, [x4]", - "eor w27, w20, w5", + "mov x20, x5", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddalb w1, w23, [x21]", + "eor w21, w23, w20", + "mov x27, x21", "cset w21, hs", - "add w22, w5, w21", - "sub w22, w20, w22", - "uxtb w26, w22", - "cmp w26, w20", - "cset x22, hi", - "cmp w26, w20", - "cset x23, hs", + "add w22, w20, w21", + "sub w24, w23, w22", + "uxtb w22, w24", + "cmp w22, w23", + "cset x24, hi", + "cmp w22, w23", + "cset x25, hs", "cmp x21, #0x1 (1)", - "csel x21, x23, x22, eq", - "cmn wzr, w26, lsl #24", - "mrs x22, nzcv", - "orr w21, w22, w21, lsl #29", - "eor w22, w20, w5", - "eor w20, w26, w20", - "and w20, w20, w22", - "ubfx x20, x20, #7, #1", - "orr w20, w21, w20, lsl #28", - "msr nzcv, x20" + "csel x30, x25, x24, eq", + "cmn wzr, w22, lsl #24", + "mrs x21, nzcv", + "orr w24, w21, w30, lsl #29", + "eor w21, w23, w20", + "eor w20, w22, w23", + "and w23, w20, w21", + "ubfx x20, x23, #7, #1", + "orr w21, w24, w20, lsl #28", + "mov x26, x22", + "msr nzcv, x21" ] }, "lock sbb word [rax], cx": { - "ExpectedInstructionCount": 23, + "ExpectedInstructionCount": 27, "Comment": "0x19", "ExpectedArm64ASM": [ - "adc w20, wzr, w5", - "neg w1, w20", - "ldaddalh w1, w20, [x4]", - "eor w27, w20, w5", + "mov x20, x5", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddalh w1, w23, [x21]", + "eor w21, w23, w20", + "mov x27, x21", "cset w21, hs", - "add w22, w5, w21", - "sub w22, w20, w22", - "uxth w26, w22", - "cmp w26, w20", - "cset x22, hi", - "cmp w26, w20", - "cset x23, hs", + "add w22, w20, w21", + "sub w24, w23, w22", + "uxth w22, w24", + "cmp w22, w23", + "cset x24, hi", + "cmp w22, w23", + "cset x25, hs", "cmp x21, #0x1 (1)", - "csel x21, x23, x22, eq", - "cmn wzr, w26, lsl #16", - "mrs x22, nzcv", - "orr w21, w22, w21, lsl #29", - "eor w22, w20, w5", - "eor w20, w26, w20", - "and w20, w20, w22", - "ubfx x20, x20, #15, #1", - "orr w20, w21, w20, lsl #28", - "msr nzcv, x20" + "csel x30, x25, x24, eq", + "cmn wzr, w22, lsl #16", + "mrs x21, nzcv", + "orr w24, w21, w30, lsl #29", + "eor w21, w23, w20", + "eor w20, w22, w23", + "and w23, w20, w21", + "ubfx x20, x23, #15, #1", + "orr w21, w24, w20, lsl #28", + "mov x26, x22", + "msr nzcv, x21" ] }, "lock sbb dword [rax], ecx": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 15, "Comment": "0x19", "ExpectedArm64ASM": [ - "adc w20, wzr, w5", - "neg w1, w20", - "ldaddal w1, w20, [x4]", - "eor w27, w20, w5", + "mov x20, x5", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddal w1, w23, [x21]", + "eor w21, w23, w20", + "mov x27, x21", "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs w26, w20, w5", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "sbcs w21, w23, w20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x26, x21", + "msr nzcv, x22" ] }, "lock and byte [rax], cl": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x20", "ExpectedArm64ASM": [ - "mvn w1, w5", - "ldclralb w1, w20, [x4]", - "and w26, w20, w5", - "cmn wzr, w26, lsl #24" + "mov x20, x5", + "mov x21, x4", + "mvn w1, w20", + "ldclralb w1, w22, [x21]", + "and w21, w22, w20", + "cmn wzr, w21, lsl #24", + "mov x26, x21" ] }, "lock and word [rax], cx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x21", "ExpectedArm64ASM": [ - "mvn w1, w5", - "ldclralh w1, w20, [x4]", - "and w26, w20, w5", - "cmn wzr, w26, lsl #16" + "mov x20, x5", + "mov x21, x4", + "mvn w1, w20", + "ldclralh w1, w22, [x21]", + "and w21, w22, w20", + "cmn wzr, w21, lsl #16", + "mov x26, x21" ] }, "lock and dword [rax], ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x21", "ExpectedArm64ASM": [ - "mvn w1, w5", - "ldclral w1, w20, [x4]", - "ands w26, w20, w5" + "mov x20, x5", + "mov x21, x4", + "mvn w1, w20", + "ldclral w1, w22, [x21]", + "ands w21, w22, w20", + "mov x26, x21" ] }, "lock sub byte [rax], cl": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": "0x28", "ExpectedArm64ASM": [ - "neg w1, w5", - "ldaddalb w1, w20, [x4]", - "eor w27, w20, w5", - "lsl w0, w20, #24", - "cmp w0, w5, lsl #24", - "sub w26, w20, w5", + "mov x20, x5", + "mov x21, x4", + "neg w1, w20", + "ldaddalb w1, w22, [x21]", + "eor w21, w22, w20", + "mov x27, x21", + "lsl w0, w22, #24", + "cmp w0, w20, lsl #24", + "sub w21, w22, w20", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock sub word [rax], cx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": "0x28", "ExpectedArm64ASM": [ - "neg w1, w5", - "ldaddalh w1, w20, [x4]", - "eor w27, w20, w5", - "lsl w0, w20, #16", - "cmp w0, w5, lsl #16", - "sub w26, w20, w5", + "mov x20, x5", + "mov x21, x4", + "neg w1, w20", + "ldaddalh w1, w22, [x21]", + "eor w21, w22, w20", + "mov x27, x21", + "lsl w0, w22, #16", + "cmp w0, w20, lsl #16", + "sub w21, w22, w20", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock sub dword [rax], ecx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x29", "ExpectedArm64ASM": [ - "neg w1, w5", - "ldaddal w1, w20, [x4]", - "eor w27, w20, w5", - "subs w26, w20, w5", + "mov x20, x5", + "mov x21, x4", + "neg w1, w20", + "ldaddal w1, w22, [x21]", + "eor w21, w22, w20", + "mov x27, x21", + "subs w21, w22, w20", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock xor byte [rax], cl": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x30", "ExpectedArm64ASM": [ - "ldeoralb w5, w20, [x4]", - "eor w26, w20, w5", - "cmn wzr, w26, lsl #24" + "mov x20, x5", + "mov x21, x4", + "ldeoralb w20, w22, [x21]", + "eor w21, w22, w20", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "lock xor word [rax], cx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x31", "ExpectedArm64ASM": [ - "ldeoralh w5, w20, [x4]", - "eor w26, w20, w5", - "cmn wzr, w26, lsl #16" + "mov x20, x5", + "mov x21, x4", + "ldeoralh w20, w22, [x21]", + "eor w21, w22, w20", + "mov x26, x21", + "cmn wzr, w21, lsl #16" ] }, "lock xor dword [rax], ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x31", "ExpectedArm64ASM": [ - "ldeoral w5, w20, [x4]", - "eor w26, w20, w5", - "tst w26, w26" + "mov x20, x5", + "mov x21, x4", + "ldeoral w20, w22, [x21]", + "eor w21, w22, w20", + "mov x26, x21", + "tst w21, w21" ] }, "lock add qword [rax], rcx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x01", "ExpectedArm64ASM": [ - "ldaddal x5, x20, [x4]", - "eor w27, w20, w5", - "adds x26, x20, x5" + "mov x20, x5", + "mov x21, x4", + "ldaddal x20, x22, [x21]", + "eor w21, w22, w20", + "mov x27, x21", + "adds x21, x22, x20", + "mov x26, x21" ] }, "xchg byte [rax], cl": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x86", "ExpectedArm64ASM": [ - "swpalb w5, w20, [x4]", - "bfxil x5, x20, #0, #8" + "mov x20, x5", + "mov x21, x4", + "swpalb w20, w22, [x21]", + "mov x21, x20", + "bfxil x21, x22, #0, #8", + "mov x5, x21" ] }, "xchg word [rax], cx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x87", "ExpectedArm64ASM": [ - "swpalh w5, w20, [x4]", - "bfxil x5, x20, #0, #16" + "mov x20, x5", + "mov x21, x4", + "swpalh w20, w22, [x21]", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x5, x21" ] }, "xchg dword [rax], ecx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x87", "ExpectedArm64ASM": [ - "swpal w5, w5, [x4]" + "mov x20, x5", + "mov x21, x4", + "swpal w20, w22, [x21]", + "mov x5, x22" ] }, "xchg qword [rax], rcx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x87", "ExpectedArm64ASM": [ - "swpal x5, x5, [x4]" + "mov x20, x5", + "mov x21, x4", + "swpal x20, x22, [x21]", + "mov x5, x22" ] }, "xadd byte [rax], bl": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xc0", "ExpectedArm64ASM": [ - "uxtb w20, w7", - "ldaddalb w20, w21, [x4]", - "bfxil x7, x21, #0, #8", - "eor w27, w21, w20", - "lsl w0, w21, #24", - "cmn w0, w20, lsl #24", - "add w26, w21, w20" + "mov x20, x4", + "mov x21, x7", + "uxtb w22, w21", + "ldaddalb w22, w23, [x20]", + "mov x20, x21", + "bfxil x20, x23, #0, #8", + "mov x7, x20", + "eor w20, w23, w22", + "mov x27, x20", + "lsl w0, w23, #24", + "cmn w0, w22, lsl #24", + "add w20, w23, w22", + "mov x26, x20" ] }, "xadd word [rax], bx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ - "uxth w20, w7", - "ldaddalh w20, w21, [x4]", - "bfxil x7, x21, #0, #16", - "eor w27, w21, w20", - "lsl w0, w21, #16", - "cmn w0, w20, lsl #16", - "add w26, w21, w20" + "mov x20, x4", + "mov x21, x7", + "uxth w22, w21", + "ldaddalh w22, w23, [x20]", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x7, x20", + "eor w20, w23, w22", + "mov x27, x20", + "lsl w0, w23, #16", + "cmn w0, w22, lsl #16", + "add w20, w23, w22", + "mov x26, x20" ] }, "xadd dword [rax], ebx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ - "mov w20, w7", - "ldaddal w20, w7, [x4]", - "eor w27, w7, w20", - "adds w26, w7, w20" + "mov x20, x4", + "mov x21, x7", + "mov w22, w21", + "ldaddal w22, w21, [x20]", + "mov x7, x21", + "eor w20, w21, w22", + "mov x27, x20", + "adds w20, w21, w22", + "mov x26, x20" ] }, "xadd qword [rax], rbx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ - "mov x20, x7", - "ldaddal x20, x7, [x4]", - "eor w27, w7, w20", - "adds x26, x7, x20" + "mov x20, x4", + "mov x21, x7", + "ldaddal x21, x22, [x20]", + "mov x7, x22", + "eor w20, w22, w21", + "mov x27, x20", + "adds x20, x22, x21", + "mov x26, x20" ] }, "lock add byte [rax], 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x80 /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldaddalb w20, w27, [x4]", - "lsl w0, w27, #24", + "mov x21, x4", + "ldaddalb w20, w22, [x21]", + "mov x27, x22", + "lsl w0, w22, #24", "cmn w0, w20, lsl #24", - "add w26, w27, #0x1 (1)" + "add w20, w22, #0x1 (1)", + "mov x26, x20" ] }, "lock add byte [rax], 0xFF": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "GROUP1 0x80 /0", "ExpectedArm64ASM": [ "mov w20, #0xff", - "ldaddalb w20, w21, [x4]", - "mvn w27, w21", - "lsl w0, w21, #24", + "mov x21, x4", + "ldaddalb w20, w22, [x21]", + "mvn w21, w22", + "mov x27, x21", + "lsl w0, w22, #24", "cmn w0, w20, lsl #24", - "add w26, w21, #0xff (255)" + "add w20, w22, #0xff (255)", + "mov x26, x20" ] }, "lock add word [rax], 0x100": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldaddalh w20, w27, [x4]", - "lsl w0, w27, #16", + "mov x21, x4", + "ldaddalh w20, w22, [x21]", + "mov x27, x22", + "lsl w0, w22, #16", "cmn w0, w20, lsl #16", - "add w26, w27, #0x100 (256)" + "add w20, w22, #0x100 (256)", + "mov x26, x20" ] }, "lock add word [rax], 0xFFFF": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "ldaddalh w20, w21, [x4]", - "mvn w27, w21", - "lsl w0, w21, #16", + "mov x21, x4", + "ldaddalh w20, w22, [x21]", + "mvn w21, w22", + "mov x27, x21", + "lsl w0, w22, #16", "cmn w0, w20, lsl #16", - "add w26, w21, w20" + "add w21, w22, w20", + "mov x26, x21" ] }, "lock add dword [rax], 0x100": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldaddal w20, w27, [x4]", - "adds w26, w27, #0x100 (256)" + "mov x21, x4", + "ldaddal w20, w22, [x21]", + "mov x27, x22", + "adds w20, w22, #0x100 (256)", + "mov x26, x20" ] }, "lock add dword [rax], 0xFFFFFFFF": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "ldaddal w20, w21, [x4]", - "mvn w27, w21", - "adds w26, w21, w20" + "mov x21, x4", + "ldaddal w20, w22, [x21]", + "mvn w21, w22", + "mov x27, x21", + "adds w21, w22, w20", + "mov x26, x21" ] }, "lock add qword [rax], 0x100": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldaddal x20, x27, [x4]", - "adds x26, x27, #0x100 (256)" + "mov x21, x4", + "ldaddal x20, x22, [x21]", + "mov x27, x22", + "adds x20, x22, #0x100 (256)", + "mov x26, x20" ] }, "lock add qword [rax], -2147483647": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov x20, #0xffffffff80000001", - "ldaddal x20, x27, [x4]", - "adds x26, x27, x20" + "mov x21, x4", + "ldaddal x20, x22, [x21]", + "mov x27, x22", + "adds x21, x22, x20", + "mov x26, x21" ] }, "lock add word [rax], 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldaddalh w20, w27, [x4]", - "lsl w0, w27, #16", + "mov x21, x4", + "ldaddalh w20, w22, [x21]", + "mov x27, x22", + "lsl w0, w22, #16", "cmn w0, w20, lsl #16", - "add w26, w27, #0x1 (1)" + "add w20, w22, #0x1 (1)", + "mov x26, x20" ] }, "lock add dword [rax], 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldaddal w20, w27, [x4]", - "adds w26, w27, #0x1 (1)" + "mov x21, x4", + "ldaddal w20, w22, [x21]", + "mov x27, x22", + "adds w20, w22, #0x1 (1)", + "mov x26, x20" ] }, "lock add qword [rax], 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldaddal x20, x27, [x4]", - "adds x26, x27, #0x1 (1)" + "mov x21, x4", + "ldaddal x20, x22, [x21]", + "mov x27, x22", + "adds x20, x22, #0x1 (1)", + "mov x26, x20" ] }, "lock or byte [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x80 /1", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldsetalb w20, w20, [x4]", - "orr w26, w20, #0x1", - "cmn wzr, w26, lsl #24" + "mov x21, x4", + "ldsetalb w20, w22, [x21]", + "orr w20, w22, #0x1", + "mov x26, x20", + "cmn wzr, w20, lsl #24" ] }, "lock or byte [rax], 0xFF": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x80 /1", "ExpectedArm64ASM": [ "mov w20, #0xff", - "ldsetalb w20, w20, [x4]", - "orr w26, w20, #0xff", - "cmn wzr, w26, lsl #24" + "mov x21, x4", + "ldsetalb w20, w22, [x21]", + "orr w20, w22, #0xff", + "mov x26, x20", + "cmn wzr, w20, lsl #24" ] }, "lock or word [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldsetalh w20, w20, [x4]", - "orr w26, w20, #0x100", - "cmn wzr, w26, lsl #16" + "mov x21, x4", + "ldsetalh w20, w22, [x21]", + "orr w20, w22, #0x100", + "mov x26, x20", + "cmn wzr, w20, lsl #16" ] }, "lock or word [rax], 0xFFFF": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "ldsetalh w20, w20, [x4]", - "orr w26, w20, #0xffff", - "cmn wzr, w26, lsl #16" + "mov x21, x4", + "ldsetalh w20, w22, [x21]", + "orr w20, w22, #0xffff", + "mov x26, x20", + "cmn wzr, w20, lsl #16" ] }, "lock or dword [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldsetal w20, w20, [x4]", - "orr w26, w20, #0x100", - "tst w26, w26" + "mov x21, x4", + "ldsetal w20, w22, [x21]", + "orr w20, w22, #0x100", + "mov x26, x20", + "tst w20, w20" ] }, "lock or dword [rax], 0xFFFFFFFF": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "ldsetal w20, w21, [x4]", - "orr w26, w21, w20", - "tst w26, w26" + "mov x21, x4", + "ldsetal w20, w22, [x21]", + "orr w21, w22, w20", + "mov x26, x21", + "tst w21, w21" ] }, "lock or qword [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldsetal x20, x20, [x4]", - "orr x26, x20, #0x100", - "tst x26, x26" + "mov x21, x4", + "ldsetal x20, x22, [x21]", + "orr x20, x22, #0x100", + "mov x26, x20", + "tst x20, x20" ] }, "lock or qword [rax], -2147483647": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ "mov x20, #0xffffffff80000001", - "ldsetal x20, x20, [x4]", - "orr x26, x20, #0xffffffff80000001", - "tst x26, x26" + "mov x21, x4", + "ldsetal x20, x22, [x21]", + "orr x20, x22, #0xffffffff80000001", + "mov x26, x20", + "tst x20, x20" ] }, "lock or word [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /1", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldsetalh w20, w20, [x4]", - "orr w26, w20, #0x1", - "cmn wzr, w26, lsl #16" + "mov x21, x4", + "ldsetalh w20, w22, [x21]", + "orr w20, w22, #0x1", + "mov x26, x20", + "cmn wzr, w20, lsl #16" ] }, "lock or dword [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /1", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldsetal w20, w20, [x4]", - "orr w26, w20, #0x1", - "tst w26, w26" + "mov x21, x4", + "ldsetal w20, w22, [x21]", + "orr w20, w22, #0x1", + "mov x26, x20", + "tst w20, w20" ] }, "lock or qword [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /1", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldsetal x20, x20, [x4]", - "orr x26, x20, #0x1", - "tst x26, x26" + "mov x21, x4", + "ldsetal x20, x22, [x21]", + "orr x20, x22, #0x1", + "mov x26, x20", + "tst x20, x20" ] }, "lock adc byte [rax], 1": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 22, "Comment": "GROUP1 0x80 /2", "ExpectedArm64ASM": [ "mov w20, #0x1", "adc w21, wzr, w20", - "ldaddalb w21, w27, [x4]", + "mov x22, x4", + "ldaddalb w21, w23, [x22]", + "mov x27, x23", "cset w21, hs", - "adc w20, w27, w20", - "uxtb w26, w20", - "cmp w26, #0x1 (1)", - "cset x20, lo", - "cmp w26, #0x1 (1)", - "cset x22, ls", + "adc w22, w23, w20", + "uxtb w20, w22", + "cmp w20, #0x1 (1)", + "cset x22, lo", + "cmp w20, #0x1 (1)", + "cset x24, ls", "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", + "csel x25, x24, x22, eq", + "cmn wzr, w20, lsl #24", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w26, w27", - "ubfx x21, x21, #7, #1", - "orr w20, w20, w21, lsl #28", - "msr nzcv, x20" + "orr w22, w21, w25, lsl #29", + "bic w21, w20, w23", + "ubfx x23, x21, #7, #1", + "orr w21, w22, w23, lsl #28", + "mov x26, x20", + "msr nzcv, x21" ] }, "lock adc byte [rax], 0xFF": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 23, "Comment": "GROUP1 0x80 /2", "ExpectedArm64ASM": [ "mov w20, #0xff", "adc w21, wzr, w20", - "ldaddalb w21, w21, [x4]", - "mvn w27, w21", - "cset w22, hs", - "adc w20, w21, w20", - "uxtb w26, w20", - "cmp w26, #0xff (255)", - "cset x20, lo", - "cmp w26, #0xff (255)", - "cset x23, ls", - "cmp x22, #0x1 (1)", - "csel x20, x23, x20, eq", - "cmn wzr, w26, lsl #24", - "mrs x22, nzcv", - "orr w20, w22, w20, lsl #29", - "bic w21, w21, w26", - "ubfx x21, x21, #7, #1", - "orr w20, w20, w21, lsl #28", - "msr nzcv, x20" + "mov x22, x4", + "ldaddalb w21, w23, [x22]", + "mvn w21, w23", + "mov x27, x21", + "cset w21, hs", + "adc w22, w23, w20", + "uxtb w20, w22", + "cmp w20, #0xff (255)", + "cset x22, lo", + "cmp w20, #0xff (255)", + "cset x24, ls", + "cmp x21, #0x1 (1)", + "csel x25, x24, x22, eq", + "cmn wzr, w20, lsl #24", + "mrs x21, nzcv", + "orr w22, w21, w25, lsl #29", + "bic w21, w23, w20", + "ubfx x23, x21, #7, #1", + "orr w21, w22, w23, lsl #28", + "mov x26, x20", + "msr nzcv, x21" ] }, "lock adc word [rax], 0x100": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 22, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov w20, #0x100", "adc w21, wzr, w20", - "ldaddalh w21, w27, [x4]", + "mov x22, x4", + "ldaddalh w21, w23, [x22]", + "mov x27, x23", "cset w21, hs", - "adc w20, w27, w20", - "uxth w26, w20", - "cmp w26, #0x100 (256)", - "cset x20, lo", - "cmp w26, #0x100 (256)", - "cset x22, ls", + "adc w22, w23, w20", + "uxth w20, w22", + "cmp w20, #0x100 (256)", + "cset x22, lo", + "cmp w20, #0x100 (256)", + "cset x24, ls", "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #16", + "csel x25, x24, x22, eq", + "cmn wzr, w20, lsl #16", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w26, w27", - "ubfx x21, x21, #15, #1", - "orr w20, w20, w21, lsl #28", - "msr nzcv, x20" + "orr w22, w21, w25, lsl #29", + "bic w21, w20, w23", + "ubfx x23, x21, #15, #1", + "orr w21, w22, w23, lsl #28", + "mov x26, x20", + "msr nzcv, x21" ] }, "lock adc word [rax], 0xFFFF": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 23, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov w20, #0xffff", "adc w21, wzr, w20", - "ldaddalh w21, w21, [x4]", - "mvn w27, w21", - "cset w22, hs", - "adc w23, w21, w20", - "uxth w26, w23", - "cmp w26, w20", - "cset x23, lo", - "cmp w26, w20", - "cset x20, ls", - "cmp x22, #0x1 (1)", - "csel x20, x20, x23, eq", - "cmn wzr, w26, lsl #16", - "mrs x22, nzcv", - "orr w20, w22, w20, lsl #29", - "bic w21, w21, w26", - "ubfx x21, x21, #15, #1", - "orr w20, w20, w21, lsl #28", + "mov x22, x4", + "ldaddalh w21, w23, [x22]", + "mvn w21, w23", + "mov x27, x21", + "cset w21, hs", + "adc w22, w23, w20", + "uxth w24, w22", + "cmp w24, w20", + "cset x22, lo", + "cmp w24, w20", + "cset x25, ls", + "cmp x21, #0x1 (1)", + "csel x20, x25, x22, eq", + "cmn wzr, w24, lsl #16", + "mrs x21, nzcv", + "orr w22, w21, w20, lsl #29", + "bic w20, w23, w24", + "ubfx x21, x20, #15, #1", + "orr w20, w22, w21, lsl #28", + "mov x26, x24", "msr nzcv, x20" ] }, "lock adc dword [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov w20, #0x100", "adc w21, wzr, w20", - "ldaddal w21, w27, [x4]", - "adcs w26, w27, w20" + "mov x22, x4", + "ldaddal w21, w23, [x22]", + "mov x27, x23", + "adcs w21, w23, w20", + "mov x26, x21" ] }, "lock adc dword [rax], 0xFFFFFFFF": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", "adc w21, wzr, w20", - "ldaddal w21, w21, [x4]", - "mvn w27, w21", - "adcs w26, w21, w20" + "mov x22, x4", + "ldaddal w21, w23, [x22]", + "mvn w21, w23", + "mov x27, x21", + "adcs w21, w23, w20", + "mov x26, x21" ] }, "lock adc qword [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov w20, #0x100", "adc x21, xzr, x20", - "ldaddal x21, x27, [x4]", - "adcs x26, x27, x20" + "mov x22, x4", + "ldaddal x21, x23, [x22]", + "mov x27, x23", + "adcs x21, x23, x20", + "mov x26, x21" ] }, "lock adc qword [rax], -2147483647": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov x20, #0xffffffff80000001", "adc x21, xzr, x20", - "ldaddal x21, x27, [x4]", - "adcs x26, x27, x20" + "mov x22, x4", + "ldaddal x21, x23, [x22]", + "mov x27, x23", + "adcs x21, x23, x20", + "mov x26, x21" ] }, "lock adc word [rax], 1": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 22, "Comment": "GROUP1 0x83 /2", "ExpectedArm64ASM": [ "mov w20, #0x1", "adc w21, wzr, w20", - "ldaddalh w21, w27, [x4]", + "mov x22, x4", + "ldaddalh w21, w23, [x22]", + "mov x27, x23", "cset w21, hs", - "adc w20, w27, w20", - "uxth w26, w20", - "cmp w26, #0x1 (1)", - "cset x20, lo", - "cmp w26, #0x1 (1)", - "cset x22, ls", + "adc w22, w23, w20", + "uxth w20, w22", + "cmp w20, #0x1 (1)", + "cset x22, lo", + "cmp w20, #0x1 (1)", + "cset x24, ls", "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #16", + "csel x25, x24, x22, eq", + "cmn wzr, w20, lsl #16", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w26, w27", - "ubfx x21, x21, #15, #1", - "orr w20, w20, w21, lsl #28", - "msr nzcv, x20" + "orr w22, w21, w25, lsl #29", + "bic w21, w20, w23", + "ubfx x23, x21, #15, #1", + "orr w21, w22, w23, lsl #28", + "mov x26, x20", + "msr nzcv, x21" ] }, "lock adc dword [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x83 /2", "ExpectedArm64ASM": [ "mov w20, #0x1", "adc w21, wzr, w20", - "ldaddal w21, w27, [x4]", - "adcs w26, w27, w20" + "mov x22, x4", + "ldaddal w21, w23, [x22]", + "mov x27, x23", + "adcs w21, w23, w20", + "mov x26, x21" ] }, "lock adc qword [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x83 /2", "ExpectedArm64ASM": [ "mov w20, #0x1", "adc x21, xzr, x20", - "ldaddal x21, x27, [x4]", - "adcs x26, x27, x20" + "mov x22, x4", + "ldaddal x21, x23, [x22]", + "mov x27, x23", + "adcs x21, x23, x20", + "mov x26, x21" ] }, "lock sbb byte [rax], 1": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 24, "Comment": "GROUP1 0x80 /3", "ExpectedArm64ASM": [ "mov w20, #0x1", - "adc w21, wzr, w20", - "neg w1, w21", - "ldaddalb w1, w27, [x4]", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddalb w1, w23, [x21]", + "mov x27, x23", "cset w21, hs", - "add w20, w20, w21", - "sub w20, w27, w20", - "uxtb w26, w20", - "cmp w26, w27", + "add w22, w20, w21", + "sub w20, w23, w22", + "uxtb w22, w20", + "cmp w22, w23", "cset x20, hi", - "cmp w26, w27", - "cset x22, hs", + "cmp w22, w23", + "cset x24, hs", "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w27, w26", - "ubfx x21, x21, #7, #1", - "orr w20, w20, w21, lsl #28", + "csel x25, x24, x20, eq", + "cmn wzr, w22, lsl #24", + "mrs x20, nzcv", + "orr w21, w20, w25, lsl #29", + "bic w20, w23, w22", + "ubfx x23, x20, #7, #1", + "orr w20, w21, w23, lsl #28", + "mov x26, x22", "msr nzcv, x20" ] }, "lock sbb byte [rax], 0xFF": { - "ExpectedInstructionCount": 22, + "ExpectedInstructionCount": 25, "Comment": "GROUP1 0x80 /3", "ExpectedArm64ASM": [ "mov w20, #0xff", - "adc w21, wzr, w20", - "neg w1, w21", - "ldaddalb w1, w21, [x4]", - "mvn w27, w21", - "cset w22, hs", - "add w20, w20, w22", - "sub w20, w21, w20", - "uxtb w26, w20", - "cmp w26, w21", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddalb w1, w23, [x21]", + "mvn w21, w23", + "mov x27, x21", + "cset w21, hs", + "add w22, w20, w21", + "sub w20, w23, w22", + "uxtb w22, w20", + "cmp w22, w23", "cset x20, hi", - "cmp w26, w21", - "cset x23, hs", - "cmp x22, #0x1 (1)", - "csel x20, x23, x20, eq", - "cmn wzr, w26, lsl #24", - "mrs x22, nzcv", - "orr w20, w22, w20, lsl #29", - "bic w21, w26, w21", - "ubfx x21, x21, #7, #1", - "orr w20, w20, w21, lsl #28", + "cmp w22, w23", + "cset x24, hs", + "cmp x21, #0x1 (1)", + "csel x25, x24, x20, eq", + "cmn wzr, w22, lsl #24", + "mrs x20, nzcv", + "orr w21, w20, w25, lsl #29", + "bic w20, w22, w23", + "ubfx x23, x20, #7, #1", + "orr w20, w21, w23, lsl #28", + "mov x26, x22", "msr nzcv, x20" ] }, "lock sbb word [rax], 0x100": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 24, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov w20, #0x100", - "adc w21, wzr, w20", - "neg w1, w21", - "ldaddalh w1, w27, [x4]", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddalh w1, w23, [x21]", + "mov x27, x23", "cset w21, hs", - "add w20, w20, w21", - "sub w20, w27, w20", - "uxth w26, w20", - "cmp w26, w27", + "add w22, w20, w21", + "sub w20, w23, w22", + "uxth w22, w20", + "cmp w22, w23", "cset x20, hi", - "cmp w26, w27", - "cset x22, hs", + "cmp w22, w23", + "cset x24, hs", "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #16", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w27, w26", - "ubfx x21, x21, #15, #1", - "orr w20, w20, w21, lsl #28", + "csel x25, x24, x20, eq", + "cmn wzr, w22, lsl #16", + "mrs x20, nzcv", + "orr w21, w20, w25, lsl #29", + "bic w20, w23, w22", + "ubfx x23, x20, #15, #1", + "orr w20, w21, w23, lsl #28", + "mov x26, x22", "msr nzcv, x20" ] }, "lock sbb word [rax], 0xFFFF": { - "ExpectedInstructionCount": 22, + "ExpectedInstructionCount": 25, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "adc w21, wzr, w20", - "neg w1, w21", - "ldaddalh w1, w21, [x4]", - "mvn w27, w21", - "cset w22, hs", - "add w20, w20, w22", - "sub w20, w21, w20", - "uxth w26, w20", - "cmp w26, w21", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddalh w1, w23, [x21]", + "mvn w21, w23", + "mov x27, x21", + "cset w21, hs", + "add w22, w20, w21", + "sub w20, w23, w22", + "uxth w22, w20", + "cmp w22, w23", "cset x20, hi", - "cmp w26, w21", - "cset x23, hs", - "cmp x22, #0x1 (1)", - "csel x20, x23, x20, eq", - "cmn wzr, w26, lsl #16", - "mrs x22, nzcv", - "orr w20, w22, w20, lsl #29", - "bic w21, w26, w21", - "ubfx x21, x21, #15, #1", - "orr w20, w20, w21, lsl #28", + "cmp w22, w23", + "cset x24, hs", + "cmp x21, #0x1 (1)", + "csel x25, x24, x20, eq", + "cmn wzr, w22, lsl #16", + "mrs x20, nzcv", + "orr w21, w20, w25, lsl #29", + "bic w20, w22, w23", + "ubfx x23, x20, #15, #1", + "orr w20, w21, w23, lsl #28", + "mov x26, x22", "msr nzcv, x20" ] }, "lock sbb dword [rax], 0x100": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 14, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov w20, #0x100", - "adc w21, wzr, w20", - "neg w1, w21", - "ldaddal w1, w27, [x4]", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddal w1, w23, [x21]", + "mov x27, x23", "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs w26, w27, w20", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "sbcs w21, w23, w20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x26, x21", + "msr nzcv, x22" ] }, "lock sbb dword [rax], 0xFFFFFFFF": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 15, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "adc w21, wzr, w20", - "neg w1, w21", - "ldaddal w1, w21, [x4]", - "mvn w27, w21", - "mrs x22, nzcv", - "eor w22, w22, #0x20000000", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddal w1, w23, [x21]", + "mvn w21, w23", + "mov x27, x21", + "mrs x21, nzcv", + "eor w22, w21, #0x20000000", "msr nzcv, x22", - "sbcs w26, w21, w20", + "sbcs w21, w23, w20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x26, x21", + "msr nzcv, x22" ] }, "lock sbb qword [rax], 0x100": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 14, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov w20, #0x100", - "adc x21, xzr, x20", - "neg x1, x21", - "ldaddal x1, x27, [x4]", + "mov x21, x4", + "adc x22, xzr, x20", + "neg x1, x22", + "ldaddal x1, x23, [x21]", + "mov x27, x23", "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs x26, x27, x20", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "sbcs x21, x23, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x26, x21", + "msr nzcv, x22" ] }, "lock sbb qword [rax], -2147483647": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 14, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov x20, #0xffffffff80000001", - "adc x21, xzr, x20", - "neg x1, x21", - "ldaddal x1, x27, [x4]", + "mov x21, x4", + "adc x22, xzr, x20", + "neg x1, x22", + "ldaddal x1, x23, [x21]", + "mov x27, x23", "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs x26, x27, x20", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "sbcs x21, x23, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x26, x21", + "msr nzcv, x22" ] }, "lock sbb word [rax], 1": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 24, "Comment": "GROUP1 0x83 /3", "ExpectedArm64ASM": [ "mov w20, #0x1", - "adc w21, wzr, w20", - "neg w1, w21", - "ldaddalh w1, w27, [x4]", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddalh w1, w23, [x21]", + "mov x27, x23", "cset w21, hs", - "add w20, w20, w21", - "sub w20, w27, w20", - "uxth w26, w20", - "cmp w26, w27", + "add w22, w20, w21", + "sub w20, w23, w22", + "uxth w22, w20", + "cmp w22, w23", "cset x20, hi", - "cmp w26, w27", - "cset x22, hs", + "cmp w22, w23", + "cset x24, hs", "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #16", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w27, w26", - "ubfx x21, x21, #15, #1", - "orr w20, w20, w21, lsl #28", + "csel x25, x24, x20, eq", + "cmn wzr, w22, lsl #16", + "mrs x20, nzcv", + "orr w21, w20, w25, lsl #29", + "bic w20, w23, w22", + "ubfx x23, x20, #15, #1", + "orr w20, w21, w23, lsl #28", + "mov x26, x22", "msr nzcv, x20" ] }, "lock sbb dword [rax], 1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 14, "Comment": "GROUP1 0x83 /3", "ExpectedArm64ASM": [ "mov w20, #0x1", - "adc w21, wzr, w20", - "neg w1, w21", - "ldaddal w1, w27, [x4]", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddal w1, w23, [x21]", + "mov x27, x23", "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs w26, w27, w20", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "sbcs w21, w23, w20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x26, x21", + "msr nzcv, x22" ] }, "lock sbb qword [rax], 1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 14, "Comment": "GROUP1 0x83 /3", "ExpectedArm64ASM": [ "mov w20, #0x1", - "adc x21, xzr, x20", - "neg x1, x21", - "ldaddal x1, x27, [x4]", + "mov x21, x4", + "adc x22, xzr, x20", + "neg x1, x22", + "ldaddal x1, x23, [x21]", + "mov x27, x23", "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs x26, x27, x20", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "sbcs x21, x23, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x26, x21", + "msr nzcv, x22" ] }, "lock and byte [rax], 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x80 /4", "ExpectedArm64ASM": [ "mov w20, #0x1", + "mov x21, x4", "mvn w1, w20", - "ldclralb w1, w20, [x4]", - "and w26, w20, #0x1", - "cmn wzr, w26, lsl #24" + "ldclralb w1, w22, [x21]", + "and w20, w22, #0x1", + "cmn wzr, w20, lsl #24", + "mov x26, x20" ] }, "lock and byte [rax], 0xFF": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x80 /4", "ExpectedArm64ASM": [ "mov w20, #0xff", + "mov x21, x4", "mvn w1, w20", - "ldclralb w1, w20, [x4]", - "and w26, w20, #0xff", - "cmn wzr, w26, lsl #24" + "ldclralb w1, w22, [x21]", + "and w20, w22, #0xff", + "cmn wzr, w20, lsl #24", + "mov x26, x20" ] }, "lock and word [rax], 0x100": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ "mov w20, #0x100", + "mov x21, x4", "mvn w1, w20", - "ldclralh w1, w20, [x4]", - "and w26, w20, #0x100", - "cmn wzr, w26, lsl #16" + "ldclralh w1, w22, [x21]", + "and w20, w22, #0x100", + "cmn wzr, w20, lsl #16", + "mov x26, x20" ] }, "lock and word [rax], 0xFFFF": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ "mov w20, #0xffff", + "mov x21, x4", "mvn w1, w20", - "ldclralh w1, w20, [x4]", - "and w26, w20, #0xffff", - "cmn wzr, w26, lsl #16" + "ldclralh w1, w22, [x21]", + "and w20, w22, #0xffff", + "cmn wzr, w20, lsl #16", + "mov x26, x20" ] }, "lock and dword [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ "mov w20, #0x100", + "mov x21, x4", "mvn w1, w20", - "ldclral w1, w20, [x4]", - "ands w26, w20, #0x100" + "ldclral w1, w22, [x21]", + "ands w20, w22, #0x100", + "mov x26, x20" ] }, "lock and dword [rax], 0xFFFFFFFF": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", + "mov x21, x4", "mvn w1, w20", - "ldclral w1, w21, [x4]", - "ands w26, w21, w20" + "ldclral w1, w22, [x21]", + "ands w21, w22, w20", + "mov x26, x21" ] }, "lock and qword [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ "mov w20, #0x100", + "mov x21, x4", "mvn x1, x20", - "ldclral x1, x20, [x4]", - "ands x26, x20, #0x100" + "ldclral x1, x22, [x21]", + "ands x20, x22, #0x100", + "mov x26, x20" ] }, "lock and qword [rax], -2147483647": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ "mov x20, #0xffffffff80000001", + "mov x21, x4", "mvn x1, x20", - "ldclral x1, x20, [x4]", - "ands x26, x20, #0xffffffff80000001" + "ldclral x1, x22, [x21]", + "ands x20, x22, #0xffffffff80000001", + "mov x26, x20" ] }, "lock and word [rax], 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x83 /4", "ExpectedArm64ASM": [ "mov w20, #0x1", + "mov x21, x4", "mvn w1, w20", - "ldclralh w1, w20, [x4]", - "and w26, w20, #0x1", - "cmn wzr, w26, lsl #16" + "ldclralh w1, w22, [x21]", + "and w20, w22, #0x1", + "cmn wzr, w20, lsl #16", + "mov x26, x20" ] }, "lock and dword [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /4", "ExpectedArm64ASM": [ "mov w20, #0x1", + "mov x21, x4", "mvn w1, w20", - "ldclral w1, w20, [x4]", - "ands w26, w20, #0x1" + "ldclral w1, w22, [x21]", + "ands w20, w22, #0x1", + "mov x26, x20" ] }, "lock and qword [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /4", "ExpectedArm64ASM": [ "mov w20, #0x1", + "mov x21, x4", "mvn x1, x20", - "ldclral x1, x20, [x4]", - "ands x26, x20, #0x1" + "ldclral x1, x22, [x21]", + "ands x20, x22, #0x1", + "mov x26, x20" ] }, "lock sub byte [rax], 1": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": "GROUP1 0x80 /5", "ExpectedArm64ASM": [ "mov w20, #0x1", + "mov x21, x4", "neg w1, w20", - "ldaddalb w1, w27, [x4]", - "lsl w0, w27, #24", + "ldaddalb w1, w22, [x21]", + "mov x27, x22", + "lsl w0, w22, #24", "cmp w0, w20, lsl #24", - "sub w26, w27, #0x1 (1)", + "sub w20, w22, #0x1 (1)", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock sub byte [rax], 0xFF": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 13, "Comment": "GROUP1 0x80 /5", "ExpectedArm64ASM": [ "mov w20, #0xff", + "mov x21, x4", "neg w1, w20", - "ldaddalb w1, w21, [x4]", - "mvn w27, w21", - "lsl w0, w21, #24", + "ldaddalb w1, w22, [x21]", + "mvn w21, w22", + "mov x27, x21", + "lsl w0, w22, #24", "cmp w0, w20, lsl #24", - "sub w26, w21, #0xff (255)", + "sub w20, w22, #0xff (255)", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock sub word [rax], 0x100": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ "mov w20, #0x100", + "mov x21, x4", "neg w1, w20", - "ldaddalh w1, w27, [x4]", - "lsl w0, w27, #16", + "ldaddalh w1, w22, [x21]", + "mov x27, x22", + "lsl w0, w22, #16", "cmp w0, w20, lsl #16", - "sub w26, w27, #0x100 (256)", + "sub w20, w22, #0x100 (256)", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock sub word [rax], 0xFFFF": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 13, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ "mov w20, #0xffff", + "mov x21, x4", "neg w1, w20", - "ldaddalh w1, w21, [x4]", - "mvn w27, w21", - "lsl w0, w21, #16", + "ldaddalh w1, w22, [x21]", + "mvn w21, w22", + "mov x27, x21", + "lsl w0, w22, #16", "cmp w0, w20, lsl #16", - "sub w26, w21, w20", + "sub w21, w22, w20", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock sub dword [rax], 0x100": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ "mov w20, #0x100", + "mov x21, x4", "neg w1, w20", - "ldaddal w1, w27, [x4]", - "subs w26, w27, #0x100 (256)", + "ldaddal w1, w22, [x21]", + "mov x27, x22", + "subs w20, w22, #0x100 (256)", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock sub dword [rax], 0xFFFFFFFF": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", + "mov x21, x4", "neg w1, w20", - "ldaddal w1, w21, [x4]", - "mvn w27, w21", - "subs w26, w21, w20", + "ldaddal w1, w22, [x21]", + "mvn w21, w22", + "mov x27, x21", + "subs w21, w22, w20", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock sub qword [rax], 0x100": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ "mov w20, #0x100", + "mov x21, x4", "neg x1, x20", - "ldaddal x1, x27, [x4]", - "subs x26, x27, #0x100 (256)", + "ldaddal x1, x22, [x21]", + "mov x27, x22", + "subs x20, x22, #0x100 (256)", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock sub qword [rax], -2147483647": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ "mov x20, #0xffffffff80000001", + "mov x21, x4", "neg x1, x20", - "ldaddal x1, x27, [x4]", - "subs x26, x27, x20", + "ldaddal x1, x22, [x21]", + "mov x27, x22", + "subs x21, x22, x20", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock sub word [rax], 1": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": "GROUP1 0x83 /5", "ExpectedArm64ASM": [ "mov w20, #0x1", + "mov x21, x4", "neg w1, w20", - "ldaddalh w1, w27, [x4]", - "lsl w0, w27, #16", + "ldaddalh w1, w22, [x21]", + "mov x27, x22", + "lsl w0, w22, #16", "cmp w0, w20, lsl #16", - "sub w26, w27, #0x1 (1)", + "sub w20, w22, #0x1 (1)", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock sub dword [rax], 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x83 /5", "ExpectedArm64ASM": [ "mov w20, #0x1", + "mov x21, x4", "neg w1, w20", - "ldaddal w1, w27, [x4]", - "subs w26, w27, #0x1 (1)", + "ldaddal w1, w22, [x21]", + "mov x27, x22", + "subs w20, w22, #0x1 (1)", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock sub qword [rax], 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x83 /5", "ExpectedArm64ASM": [ "mov w20, #0x1", + "mov x21, x4", "neg x1, x20", - "ldaddal x1, x27, [x4]", - "subs x26, x27, #0x1 (1)", + "ldaddal x1, x22, [x21]", + "mov x27, x22", + "subs x20, x22, #0x1 (1)", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock xor byte [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x80 /6", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldeoralb w20, w20, [x4]", - "eor w26, w20, #0x1", - "cmn wzr, w26, lsl #24" + "mov x21, x4", + "ldeoralb w20, w22, [x21]", + "eor w20, w22, #0x1", + "mov x26, x20", + "cmn wzr, w20, lsl #24" ] }, "lock xor byte [rax], 0xFF": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x80 /6", "ExpectedArm64ASM": [ "mov w20, #0xff", - "ldeoralb w20, w20, [x4]", - "eor w26, w20, #0xff", - "cmn wzr, w26, lsl #24" + "mov x21, x4", + "ldeoralb w20, w22, [x21]", + "eor w20, w22, #0xff", + "mov x26, x20", + "cmn wzr, w20, lsl #24" ] }, "lock xor word [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldeoralh w20, w20, [x4]", - "eor w26, w20, #0x100", - "cmn wzr, w26, lsl #16" + "mov x21, x4", + "ldeoralh w20, w22, [x21]", + "eor w20, w22, #0x100", + "mov x26, x20", + "cmn wzr, w20, lsl #16" ] }, "lock xor word [rax], 0xFFFF": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "ldeoralh w20, w20, [x4]", - "eor w26, w20, #0xffff", - "cmn wzr, w26, lsl #16" + "mov x21, x4", + "ldeoralh w20, w22, [x21]", + "eor w20, w22, #0xffff", + "mov x26, x20", + "cmn wzr, w20, lsl #16" ] }, "lock xor dword [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldeoral w20, w20, [x4]", - "eor w26, w20, #0x100", - "tst w26, w26" + "mov x21, x4", + "ldeoral w20, w22, [x21]", + "eor w20, w22, #0x100", + "mov x26, x20", + "tst w20, w20" ] }, "lock xor dword [rax], 0xFFFFFFFF": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "ldeoral w20, w21, [x4]", - "eor w26, w21, w20", - "tst w26, w26" + "mov x21, x4", + "ldeoral w20, w22, [x21]", + "eor w21, w22, w20", + "mov x26, x21", + "tst w21, w21" ] }, "lock xor qword [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldeoral x20, x20, [x4]", - "eor x26, x20, #0x100", - "tst x26, x26" + "mov x21, x4", + "ldeoral x20, x22, [x21]", + "eor x20, x22, #0x100", + "mov x26, x20", + "tst x20, x20" ] }, "lock xor qword [rax], -2147483647": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ "mov x20, #0xffffffff80000001", - "ldeoral x20, x20, [x4]", - "eor x26, x20, #0xffffffff80000001", - "tst x26, x26" + "mov x21, x4", + "ldeoral x20, x22, [x21]", + "eor x20, x22, #0xffffffff80000001", + "mov x26, x20", + "tst x20, x20" ] }, "lock xor word [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /6", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldeoralh w20, w20, [x4]", - "eor w26, w20, #0x1", - "cmn wzr, w26, lsl #16" + "mov x21, x4", + "ldeoralh w20, w22, [x21]", + "eor w20, w22, #0x1", + "mov x26, x20", + "cmn wzr, w20, lsl #16" ] }, "lock xor dword [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /6", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldeoral w20, w20, [x4]", - "eor w26, w20, #0x1", - "tst w26, w26" + "mov x21, x4", + "ldeoral w20, w22, [x21]", + "eor w20, w22, #0x1", + "mov x26, x20", + "tst w20, w20" ] }, "lock xor qword [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /6", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldeoral x20, x20, [x4]", - "eor x26, x20, #0x1", - "tst x26, x26" + "mov x21, x4", + "ldeoral x20, x22, [x21]", + "eor x20, x22, #0x1", + "mov x26, x20", + "tst x20, x20" ] }, "lock dec byte [rax]": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 14, "Comment": "GROUP3 0xfe /1", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov w21, #0xff", - "ldaddalb w21, w27, [x4]", + "mov x21, x4", + "mov w22, #0xff", + "ldaddalb w22, w23, [x21]", "cset w21, hs", - "lsl w0, w27, #24", + "mov x27, x23", + "lsl w0, w23, #24", "cmp w0, w20, lsl #24", - "sub w26, w27, #0x1 (1)", + "sub w20, w23, #0x1 (1)", + "mov x26, x20", "mrs x20, nzcv", - "bfi w20, w21, #29, #1", - "msr nzcv, x20" + "mov w22, w20", + "bfi w22, w21, #29, #1", + "msr nzcv, x22" ] }, "lock not byte [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf6 /2", "ExpectedArm64ASM": [ "mov w20, #0xff", - "steorlb w20, [x4]" + "mov x21, x4", + "steorlb w20, [x21]" ] }, "lock not word [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /2", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "steorlh w20, [x4]" + "mov x21, x4", + "steorlh w20, [x21]" ] }, "lock not dword [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /2", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "steorl w20, [x4]" + "mov x21, x4", + "steorl w20, [x21]" ] }, "lock not qword [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /2", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "steorl x20, [x4]" + "mov x21, x4", + "steorl x20, [x21]" ] }, "lock neg byte [rax]": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 13, "Comment": "GROUP2 0xf6 /3", "ExpectedArm64ASM": [ - "ldaxrb w1, [x4]", + "mov x20, x4", + "ldaxrb w1, [x20]", "neg w2, w1", - "stlxrb w3, w2, [x4]", + "stlxrb w3, w2, [x20]", "cbnz w3, #-0xc", - "mov w27, w1", - "cmp wzr, w27, lsl #24", - "neg w26, w27", + "mov w21, w1", + "mov x27, x21", + "cmp wzr, w21, lsl #24", + "neg w20, w21", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock neg word [rax]": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 13, "Comment": "GROUP2 0xf7 /3", "ExpectedArm64ASM": [ - "ldaxrh w1, [x4]", + "mov x20, x4", + "ldaxrh w1, [x20]", "neg w2, w1", - "stlxrh w3, w2, [x4]", + "stlxrh w3, w2, [x20]", "cbnz w3, #-0xc", - "mov w27, w1", - "cmp wzr, w27, lsl #16", - "neg w26, w27", + "mov w21, w1", + "mov x27, x21", + "cmp wzr, w21, lsl #16", + "neg w20, w21", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock neg dword [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xf7 /3", "ExpectedArm64ASM": [ - "ldaxr w1, [x4]", + "mov x20, x4", + "ldaxr w1, [x20]", "neg w2, w1", - "stlxr w3, w2, [x4]", + "stlxr w3, w2, [x20]", "cbnz w3, #-0xc", - "mov w27, w1", - "negs w26, w27", + "mov w21, w1", + "mov x27, x21", + "negs w20, w21", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock neg qword [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xf7 /3", "ExpectedArm64ASM": [ - "ldaxr x1, [x4]", + "mov x20, x4", + "ldaxr x1, [x20]", "neg x2, x1", - "stlxr w3, x2, [x4]", + "stlxr w3, x2, [x20]", "cbnz x3, #-0xc", - "mov x27, x1", - "negs x26, x27", + "mov x21, x1", + "mov x27, x21", + "negs x20, x21", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "lock dec word [rax]": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 14, "Comment": "GROUP4 0xfe /1", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov w21, #0xffff", - "ldaddalh w21, w27, [x4]", + "mov x21, x4", + "mov w22, #0xffff", + "ldaddalh w22, w23, [x21]", "cset w21, hs", - "lsl w0, w27, #16", + "mov x27, x23", + "lsl w0, w23, #16", "cmp w0, w20, lsl #16", - "sub w26, w27, #0x1 (1)", + "sub w20, w23, #0x1 (1)", + "mov x26, x20", "mrs x20, nzcv", - "bfi w20, w21, #29, #1", - "msr nzcv, x20" + "mov w22, w20", + "bfi w22, w21, #29, #1", + "msr nzcv, x22" ] }, "lock dec dword [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": "GROUP4 0xfe /1", "ExpectedArm64ASM": [ - "mov w20, #0xffffffff", - "ldaddal w20, w27, [x4]", + "mov x20, x4", + "mov w21, #0xffffffff", + "ldaddal w21, w22, [x20]", "cset w20, hs", - "subs w26, w27, #0x1 (1)", + "mov x27, x22", + "subs w21, w22, #0x1 (1)", + "mov x26, x21", "mrs x21, nzcv", - "mov w0, w21", - "bfi w0, w20, #29, #1", - "mov w20, w0", - "msr nzcv, x20" + "mov w22, w21", + "bfi w22, w20, #29, #1", + "msr nzcv, x22" ] }, "lock dec qword [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": "GROUP4 0xfe /1", "ExpectedArm64ASM": [ - "mov x20, #0xffffffffffffffff", - "ldaddal x20, x27, [x4]", + "mov x20, x4", + "mov x21, #0xffffffffffffffff", + "ldaddal x21, x22, [x20]", "cset w20, hs", - "subs x26, x27, #0x1 (1)", + "mov x27, x22", + "subs x21, x22, #0x1 (1)", + "mov x26, x21", "mrs x21, nzcv", - "mov w0, w21", - "bfi w0, w20, #29, #1", - "mov w20, w0", - "msr nzcv, x20" + "mov w22, w21", + "bfi w22, w20, #29, #1", + "msr nzcv, x22" ] }, "lock inc byte [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": "GROUP4 0xfe /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldaddalb w20, w27, [x4]", + "mov x21, x4", + "ldaddalb w20, w22, [x21]", "cset w21, hs", - "lsl w0, w27, #24", + "mov x27, x22", + "lsl w0, w22, #24", "cmn w0, w20, lsl #24", - "add w26, w27, #0x1 (1)", + "add w20, w22, #0x1 (1)", + "mov x26, x20", "mrs x20, nzcv", - "bfi w20, w21, #29, #1", - "msr nzcv, x20" + "mov w22, w20", + "bfi w22, w21, #29, #1", + "msr nzcv, x22" ] }, "lock inc word [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": "GROUP4 0xfe /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldaddalh w20, w27, [x4]", + "mov x21, x4", + "ldaddalh w20, w22, [x21]", "cset w21, hs", - "lsl w0, w27, #16", + "mov x27, x22", + "lsl w0, w22, #16", "cmn w0, w20, lsl #16", - "add w26, w27, #0x1 (1)", + "add w20, w22, #0x1 (1)", + "mov x26, x20", "mrs x20, nzcv", - "bfi w20, w21, #29, #1", - "msr nzcv, x20" + "mov w22, w20", + "bfi w22, w21, #29, #1", + "msr nzcv, x22" ] }, "lock inc dword [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": "GROUP4 0xfe /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldaddal w20, w27, [x4]", + "mov x21, x4", + "ldaddal w20, w22, [x21]", "cset w20, hs", - "adds w26, w27, #0x1 (1)", + "mov x27, x22", + "adds w21, w22, #0x1 (1)", + "mov x26, x21", "mrs x21, nzcv", - "mov w0, w21", - "bfi w0, w20, #29, #1", - "mov w20, w0", - "msr nzcv, x20" + "mov w22, w21", + "bfi w22, w20, #29, #1", + "msr nzcv, x22" ] }, "lock inc qword [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": "GROUP4 0xfe /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldaddal x20, x27, [x4]", + "mov x21, x4", + "ldaddal x20, x22, [x21]", "cset w20, hs", - "adds x26, x27, #0x1 (1)", + "mov x27, x22", + "adds x21, x22, #0x1 (1)", + "mov x26, x21", "mrs x21, nzcv", - "mov w0, w21", - "bfi w0, w20, #29, #1", - "mov w20, w0", - "msr nzcv, x20" + "mov w22, w21", + "bfi w22, w20, #29, #1", + "msr nzcv, x22" ] } } diff --git a/unittests/InstructionCountCI/Crypto/H0F38.json b/unittests/InstructionCountCI/Crypto/H0F38.json index 4e573ad3e6..f3f73f03d7 100644 --- a/unittests/InstructionCountCI/Crypto/H0F38.json +++ b/unittests/InstructionCountCI/Crypto/H0F38.json @@ -12,126 +12,167 @@ }, "Instructions": { "sha1nexte xmm0, xmm1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "0x66 0x0f 0x38 0xc8" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[3]", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[3]", "unimplemented (Unimplemented)", - "dup v2.4s, v2.s[0]", - "add v2.4s, v17.4s, v2.4s", - "mov v16.16b, v17.16b", - "mov v16.s[3], v2.s[3]" + "dup v4.4s, v2.s[0]", + "add v2.4s, v3.4s, v4.4s", + "mov v4.16b, v3.16b", + "mov v4.s[3], v2.s[3]", + "mov v16.16b, v4.16b" ] }, "sha256msg1 xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 6, "Comment": [ "0x66 0x0f 0x38 0xcc" ], "ExpectedArm64ASM": [ - "unimplemented (Unimplemented)" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v0.16b, v2.16b", + "unimplemented (Unimplemented)", + "mov v4.16b, v0.16b", + "mov v16.16b, v4.16b" ] }, "aesimc xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x38 0xdb" ], "ExpectedArm64ASM": [ - "unimplemented (Unimplemented)" + "mov v2.16b, v17.16b", + "unimplemented (Unimplemented)", + "mov v16.16b, v3.16b" ] }, "aesenc xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "0x66 0x0f 0x38 0xdc" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "movi v4.2d, #0x0", + "mov v0.16b, v2.16b", "unimplemented (Unimplemented)", "unimplemented (Unimplemented)", - "eor v16.16b, v16.16b, v17.16b" + "eor v5.16b, v0.16b, v3.16b", + "mov v16.16b, v5.16b" ] }, "aesenclast xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x38 0xdd" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "movi v4.2d, #0x0", + "mov v0.16b, v2.16b", "unimplemented (Unimplemented)", - "eor v16.16b, v16.16b, v17.16b" + "eor v5.16b, v0.16b, v3.16b", + "mov v16.16b, v5.16b" ] }, "aesdec xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "0x66 0x0f 0x38 0xde" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "movi v4.2d, #0x0", + "mov v0.16b, v2.16b", "unimplemented (Unimplemented)", "unimplemented (Unimplemented)", - "eor v16.16b, v16.16b, v17.16b" + "eor v5.16b, v0.16b, v3.16b", + "mov v16.16b, v5.16b" ] }, "aesdeclast xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x38 0xdf" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "movi v4.2d, #0x0", + "mov v0.16b, v2.16b", "unimplemented (Unimplemented)", - "eor v16.16b, v16.16b, v17.16b" + "eor v5.16b, v0.16b, v3.16b", + "mov v16.16b, v5.16b" ] }, "crc32 eax, bl": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0xf2 0x0f 0x38 0xf0" ], "ExpectedArm64ASM": [ - "crc32cb w4, w4, w7" + "mov x20, x4", + "mov x21, x7", + "crc32cb w22, w20, w21", + "mov x4, x22" ] }, "crc32 eax, bx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0xf2 0x0f 0x38 0xf1" ], "ExpectedArm64ASM": [ - "crc32ch w4, w4, w7" + "mov x20, x4", + "mov x21, x7", + "crc32ch w22, w20, w21", + "mov x4, x22" ] }, "crc32 eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0xf2 0x0f 0x38 0xf1" ], "ExpectedArm64ASM": [ - "crc32cw w4, w4, w7" + "mov x20, x4", + "mov x21, x7", + "crc32cw w22, w20, w21", + "mov x4, x22" ] }, "crc32 rax, bl": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0xf2 0x0f 0x38 0xf0" ], "ExpectedArm64ASM": [ - "crc32cb w4, w4, w7" + "mov x20, x4", + "mov x21, x7", + "crc32cb w22, w20, w21", + "mov x4, x22" ] }, "crc32 rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0xf2 0x0f 0x38 0xf1" ], "ExpectedArm64ASM": [ - "crc32cx w4, w4, x7" + "mov x20, x4", + "mov x21, x7", + "crc32cx w22, w20, x21", + "mov x4, x22" ] } } diff --git a/unittests/InstructionCountCI/Crypto/H0F3A.json b/unittests/InstructionCountCI/Crypto/H0F3A.json index f427a56faa..91948491b8 100644 --- a/unittests/InstructionCountCI/Crypto/H0F3A.json +++ b/unittests/InstructionCountCI/Crypto/H0F3A.json @@ -12,70 +12,86 @@ }, "Instructions": { "pclmulqdq xmm0, xmm1, 00000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x3a 0x44" ], "ExpectedArm64ASM": [ - "pmull v16.1q, v16.1d, v17.1d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "pmull v4.1q, v2.1d, v3.1d", + "mov v16.16b, v4.16b" ] }, "pclmulqdq xmm0, xmm1, 00001b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x44" ], "ExpectedArm64ASM": [ - "dup v0.2d, v16.d[1]", - "pmull v16.1q, v0.1d, v17.1d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v0.2d, v2.d[1]", + "pmull v4.1q, v0.1d, v3.1d", + "mov v16.16b, v4.16b" ] }, "pclmulqdq xmm0, xmm1, 10000b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x44" ], "ExpectedArm64ASM": [ - "dup v0.2d, v17.d[1]", - "pmull v16.1q, v0.1d, v16.1d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v0.2d, v3.d[1]", + "pmull v4.1q, v0.1d, v2.1d", + "mov v16.16b, v4.16b" ] }, "pclmulqdq xmm0, xmm1, 10001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x3a 0x44" ], "ExpectedArm64ASM": [ - "pmull2 v16.1q, v16.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "pmull2 v4.1q, v2.2d, v3.2d", + "mov v16.16b, v4.16b" ] }, "aeskeygenassist xmm0, xmm1, 0": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x3a 0xdf" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2160]", - "movi v3.2d, #0x0", - "mov v16.16b, v17.16b", + "mov v2.16b, v17.16b", + "ldr q3, [x28, #2160]", + "movi v4.2d, #0x0", + "mov v5.16b, v2.16b", "unimplemented (Unimplemented)", - "tbl v16.16b, {v16.16b}, v2.16b" + "tbl v5.16b, {v5.16b}, v3.16b", + "mov v16.16b, v5.16b" ] }, "aeskeygenassist xmm0, xmm1, 0xFF": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": [ "0x66 0x0f 0x3a 0xdf" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2160]", - "movi v3.2d, #0x0", - "mov v16.16b, v17.16b", + "mov v2.16b, v17.16b", + "ldr q3, [x28, #2160]", + "movi v4.2d, #0x0", + "mov v5.16b, v2.16b", "unimplemented (Unimplemented)", - "tbl v16.16b, {v16.16b}, v2.16b", + "tbl v5.16b, {v5.16b}, v3.16b", "mov x0, #0xff00000000", "dup v1.2d, x0", - "eor v16.16b, v16.16b, v1.16b" + "eor v5.16b, v5.16b, v1.16b", + "mov v16.16b, v5.16b" ] } } diff --git a/unittests/InstructionCountCI/DDD.json b/unittests/InstructionCountCI/DDD.json index d73d5630a6..4176940902 100644 --- a/unittests/InstructionCountCI/DDD.json +++ b/unittests/InstructionCountCI/DDD.json @@ -21,10 +21,10 @@ ], "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", - "uzp1 v2.4h, v2.4h, v2.4h", - "sxtl v2.4s, v2.4h", - "scvtf v2.2s, v2.2s", - "str d2, [x28, #768]" + "uzp1 v3.4h, v2.4h, v2.4h", + "sxtl v2.4s, v3.4h", + "scvtf v3.2s, v2.2s", + "str d3, [x28, #768]" ] }, "pi2fd mm0, mm1": { @@ -34,8 +34,8 @@ ], "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", - "scvtf v2.2s, v2.2s", - "str d2, [x28, #768]" + "scvtf v3.2s, v2.2s", + "str d3, [x28, #768]" ] }, "pf2iw mm0, mm1": { @@ -45,10 +45,10 @@ ], "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", - "fcvtzs v2.2s, v2.2s", - "uzp1 v2.4h, v2.4h, v2.4h", - "sxtl v2.4s, v2.4h", - "str d2, [x28, #768]" + "fcvtzs v3.2s, v2.2s", + "uzp1 v2.4h, v3.4h, v3.4h", + "sxtl v3.4s, v2.4h", + "str d3, [x28, #768]" ] }, "pf2id mm0, mm1": { @@ -58,8 +58,8 @@ ], "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", - "fcvtzs v2.2s, v2.2s", - "str d2, [x28, #768]" + "fcvtzs v3.2s, v2.2s", + "str d3, [x28, #768]" ] }, "pfrcpv mm0, mm1": { @@ -70,8 +70,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "fmov v0.4s, #0x70 (1.0000)", - "fdiv v2.4s, v0.4s, v2.4s", - "str d2, [x28, #768]" + "fdiv v3.4s, v0.4s, v2.4s", + "str d3, [x28, #768]" ] }, "pfrsqrtv mm0, mm1": { @@ -83,8 +83,8 @@ "ldr d2, [x28, #784]", "fmov v0.4s, #0x70 (1.0000)", "fsqrt v1.4s, v2.4s", - "fdiv v2.4s, v0.4s, v1.4s", - "str d2, [x28, #768]" + "fdiv v3.4s, v0.4s, v1.4s", + "str d3, [x28, #768]" ] }, "pfnacc mm0, mm1": { @@ -94,22 +94,23 @@ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", "uzp1 v4.2s, v2.2s, v3.2s", - "uzp2 v2.2s, v2.2s, v3.2s", - "fsub v2.4s, v4.4s, v2.4s", + "uzp2 v5.2s, v2.2s, v3.2s", + "fsub v2.4s, v4.4s, v5.4s", "str d2, [x28, #768]" ] }, "pfpnacc mm0, mm1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0x0f 0x8e", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", "dup v4.2s, v2.s[1]", - "fsub s2, s2, s4", - "faddp v3.4s, v3.4s, v3.4s", - "mov v2.s[1], v3.s[0]", - "str d2, [x28, #768]" + "fsub s5, s2, s4", + "faddp v2.4s, v3.4s, v3.4s", + "mov v3.16b, v5.16b", + "mov v3.s[1], v2.s[0]", + "str d3, [x28, #768]" ] }, "pfcmpge mm0, mm1": { @@ -118,19 +119,20 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "fcmge v2.4s, v3.4s, v2.4s", - "str d2, [x28, #768]" + "fcmge v4.4s, v3.4s, v2.4s", + "str d4, [x28, #768]" ] }, "pfmin mm0, mm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x0f 0x94", "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "fcmgt v0.4s, v3.4s, v2.4s", - "bif v2.16b, v3.16b, v0.16b", - "str d2, [x28, #768]" + "fcmgt v0.4s, v2.4s, v3.4s", + "mov v4.16b, v3.16b", + "bif v4.16b, v2.16b, v0.16b", + "str d4, [x28, #768]" ] }, "pfrcp mm0, mm1": { @@ -141,8 +143,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "fmov s0, #0x70 (1.0000)", - "fdiv s2, s0, s2", - "dup v2.2s, v2.s[0]", + "fdiv s3, s0, s2", + "dup v2.2s, v3.s[0]", "str d2, [x28, #768]" ] }, @@ -155,8 +157,8 @@ "ldr d2, [x28, #784]", "fmov s0, #0x70 (1.0000)", "fsqrt s1, s2", - "fdiv s2, s0, s1", - "dup v2.2s, v2.s[0]", + "fdiv s3, s0, s1", + "dup v2.2s, v3.s[0]", "str d2, [x28, #768]" ] }, @@ -166,8 +168,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "fsub v2.4s, v3.4s, v2.4s", - "str d2, [x28, #768]" + "fsub v4.4s, v3.4s, v2.4s", + "str d4, [x28, #768]" ] }, "pfadd mm0, mm1": { @@ -176,8 +178,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "fadd v2.4s, v3.4s, v2.4s", - "str d2, [x28, #768]" + "fadd v4.4s, v3.4s, v2.4s", + "str d4, [x28, #768]" ] }, "pfcmpgt mm0, mm1": { @@ -186,19 +188,20 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "fcmgt v2.4s, v3.4s, v2.4s", - "str d2, [x28, #768]" + "fcmgt v4.4s, v3.4s, v2.4s", + "str d4, [x28, #768]" ] }, "pfmax mm0, mm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x0f 0xa4", "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "fcmgt v0.4s, v3.4s, v2.4s", - "bit v2.16b, v3.16b, v0.16b", - "str d2, [x28, #768]" + "fcmgt v0.4s, v2.4s, v3.4s", + "mov v4.16b, v3.16b", + "bit v4.16b, v2.16b, v0.16b", + "str d4, [x28, #768]" ] }, "pfrcpit1 mm0, mm1": { @@ -233,8 +236,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "fsub v2.4s, v2.4s, v3.4s", - "str d2, [x28, #768]" + "fsub v4.4s, v2.4s, v3.4s", + "str d4, [x28, #768]" ] }, "pfcmpeq mm0, mm1": { @@ -243,8 +246,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "fcmeq v2.4s, v3.4s, v2.4s", - "str d2, [x28, #768]" + "fcmeq v4.4s, v3.4s, v2.4s", + "str d4, [x28, #768]" ] }, "pfmul mm0, mm1": { @@ -253,8 +256,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "fmul v2.4s, v3.4s, v2.4s", - "str d2, [x28, #768]" + "fmul v4.4s, v3.4s, v2.4s", + "str d4, [x28, #768]" ] }, "pfrcpit2 mm0, mm1": { @@ -280,10 +283,10 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "smull v2.4s, v2.4h, v3.4h", - "movi v3.4s, #0x80, lsl #8", - "add v2.4s, v2.4s, v3.4s", - "shrn v2.4h, v2.4s, #16", + "smull v4.4s, v2.4h, v3.4h", + "movi v2.4s, #0x80, lsl #8", + "add v3.4s, v4.4s, v2.4s", + "shrn v2.4h, v3.4s, #16", "str d2, [x28, #768]" ] }, @@ -292,8 +295,8 @@ "Comment": "0x0f 0x0f 0xbb", "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", - "rev64 v2.2s, v2.2s", - "str d2, [x28, #768]" + "rev64 v3.2s, v2.2s", + "str d3, [x28, #768]" ] }, "pavgusb mm0, mm1": { @@ -302,8 +305,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "urhadd v2.16b, v3.16b, v2.16b", - "str d2, [x28, #768]" + "urhadd v4.16b, v3.16b, v2.16b", + "str d4, [x28, #768]" ] } } diff --git a/unittests/InstructionCountCI/FEXOpt/AddressingLimitations.json b/unittests/InstructionCountCI/FEXOpt/AddressingLimitations.json index c23b03d5c1..25f73eabc3 100644 --- a/unittests/InstructionCountCI/FEXOpt/AddressingLimitations.json +++ b/unittests/InstructionCountCI/FEXOpt/AddressingLimitations.json @@ -14,911 +14,1149 @@ ], "Instructions": { "movzx rax, byte [ecx - 257]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "sub x20, x5, #0x101 (257)", - "mov w20, w20", - "ldrb w4, [x20]" + "mov x20, x5", + "sub x21, x20, #0x101 (257)", + "mov w20, w21", + "ldrb w21, [x20]", + "mov x4, x21" ] }, "movzx rax, byte [ecx - 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "sub x20, x5, #0x100 (256)", - "mov w20, w20", - "ldrb w4, [x20]" + "mov x20, x5", + "sub x21, x20, #0x100 (256)", + "mov w20, w21", + "ldrb w21, [x20]", + "mov x4, x21" ] }, "movzx rax, byte [ecx + 255]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0xff (255)", - "mov w20, w20", - "ldrb w4, [x20]" + "mov x20, x5", + "add x21, x20, #0xff (255)", + "mov w20, w21", + "ldrb w21, [x20]", + "mov x4, x21" ] }, "movzx rax, byte [ecx + 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0x100 (256)", - "mov w20, w20", - "ldrb w4, [x20]" + "mov x20, x5", + "add x21, x20, #0x100 (256)", + "mov w20, w21", + "ldrb w21, [x20]", + "mov x4, x21" ] }, "movzx rax, byte [ecx + 4095]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0xfff (4095)", - "mov w20, w20", - "ldrb w4, [x20]" + "mov x20, x5", + "add x21, x20, #0xfff (4095)", + "mov w20, w21", + "ldrb w21, [x20]", + "mov x4, x21" ] }, "movzx rax, byte [ecx + 4096]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0x1000 (4096)", - "mov w20, w20", - "ldrb w4, [x20]" + "mov x20, x5", + "add x21, x20, #0x1000 (4096)", + "mov w20, w21", + "ldrb w21, [x20]", + "mov x4, x21" ] }, "movzx rax, word [ecx - 257]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "sub x20, x5, #0x101 (257)", - "mov w20, w20", - "ldrh w4, [x20]" + "mov x20, x5", + "sub x21, x20, #0x101 (257)", + "mov w20, w21", + "ldrh w21, [x20]", + "mov x4, x21" ] }, "movzx rax, word [ecx - 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "sub x20, x5, #0x100 (256)", - "mov w20, w20", - "ldrh w4, [x20]" + "mov x20, x5", + "sub x21, x20, #0x100 (256)", + "mov w20, w21", + "ldrh w21, [x20]", + "mov x4, x21" ] }, "movzx rax, word [ecx + 255]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0xff (255)", - "mov w20, w20", - "ldrh w4, [x20]" + "mov x20, x5", + "add x21, x20, #0xff (255)", + "mov w20, w21", + "ldrh w21, [x20]", + "mov x4, x21" ] }, "movzx rax, word [ecx + 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0x100 (256)", - "mov w20, w20", - "ldrh w4, [x20]" + "mov x20, x5", + "add x21, x20, #0x100 (256)", + "mov w20, w21", + "ldrh w21, [x20]", + "mov x4, x21" ] }, "movzx rax, word [ecx + 8190]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x1ffe", - "add x20, x5, x20", - "mov w20, w20", - "ldrh w4, [x20]" + "mov x20, x5", + "mov w21, #0x1ffe", + "add x22, x20, x21", + "mov w20, w22", + "ldrh w21, [x20]", + "mov x4, x21" ] }, "movzx rax, word [ecx + 8191]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x1fff", - "add x20, x5, x20", - "mov w20, w20", - "ldrh w4, [x20]" + "mov x20, x5", + "mov w21, #0x1fff", + "add x22, x20, x21", + "mov w20, w22", + "ldrh w21, [x20]", + "mov x4, x21" ] }, "movzx rax, word [ecx + 8192]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0x2000 (8192)", - "mov w20, w20", - "ldrh w4, [x20]" + "mov x20, x5", + "add x21, x20, #0x2000 (8192)", + "mov w20, w21", + "ldrh w21, [x20]", + "mov x4, x21" ] }, "mov eax, dword [ecx - 257]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "sub x20, x5, #0x101 (257)", - "mov w20, w20", - "ldr w4, [x20]" + "mov x20, x5", + "sub x21, x20, #0x101 (257)", + "mov w20, w21", + "ldr w21, [x20]", + "mov x4, x21" ] }, "mov eax, dword [ecx - 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "sub x20, x5, #0x100 (256)", - "mov w20, w20", - "ldr w4, [x20]" + "mov x20, x5", + "sub x21, x20, #0x100 (256)", + "mov w20, w21", + "ldr w21, [x20]", + "mov x4, x21" ] }, "mov eax, dword [ecx + 255]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0xff (255)", - "mov w20, w20", - "ldr w4, [x20]" + "mov x20, x5", + "add x21, x20, #0xff (255)", + "mov w20, w21", + "ldr w21, [x20]", + "mov x4, x21" ] }, "mov eax, dword [ecx + 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0x100 (256)", - "mov w20, w20", - "ldr w4, [x20]" + "mov x20, x5", + "add x21, x20, #0x100 (256)", + "mov w20, w21", + "ldr w21, [x20]", + "mov x4, x21" ] }, "mov eax, dword [ecx + 16380]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x3ffc", - "add x20, x5, x20", - "mov w20, w20", - "ldr w4, [x20]" + "mov x20, x5", + "mov w21, #0x3ffc", + "add x22, x20, x21", + "mov w20, w22", + "ldr w21, [x20]", + "mov x4, x21" ] }, "mov eax, dword [ecx + 16381]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x3ffd", - "add x20, x5, x20", - "mov w20, w20", - "ldr w4, [x20]" + "mov x20, x5", + "mov w21, #0x3ffd", + "add x22, x20, x21", + "mov w20, w22", + "ldr w21, [x20]", + "mov x4, x21" ] }, "mov eax, dword [ecx + 16382]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x3ffe", - "add x20, x5, x20", - "mov w20, w20", - "ldr w4, [x20]" + "mov x20, x5", + "mov w21, #0x3ffe", + "add x22, x20, x21", + "mov w20, w22", + "ldr w21, [x20]", + "mov x4, x21" ] }, "mov eax, dword [ecx + 16383]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x3fff", - "add x20, x5, x20", - "mov w20, w20", - "ldr w4, [x20]" + "mov x20, x5", + "mov w21, #0x3fff", + "add x22, x20, x21", + "mov w20, w22", + "ldr w21, [x20]", + "mov x4, x21" ] }, "mov eax, dword [ecx + 16384]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0x4000 (16384)", - "mov w20, w20", - "ldr w4, [x20]" + "mov x20, x5", + "add x21, x20, #0x4000 (16384)", + "mov w20, w21", + "ldr w21, [x20]", + "mov x4, x21" ] }, "mov rax, qword [ecx - 257]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "sub x20, x5, #0x101 (257)", - "mov w20, w20", - "ldr x4, [x20]" + "mov x20, x5", + "sub x21, x20, #0x101 (257)", + "mov w20, w21", + "ldr x21, [x20]", + "mov x4, x21" ] }, "mov rax, qword [ecx - 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "sub x20, x5, #0x100 (256)", - "mov w20, w20", - "ldr x4, [x20]" + "mov x20, x5", + "sub x21, x20, #0x100 (256)", + "mov w20, w21", + "ldr x21, [x20]", + "mov x4, x21" ] }, "mov rax, qword [ecx + 255]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0xff (255)", - "mov w20, w20", - "ldr x4, [x20]" + "mov x20, x5", + "add x21, x20, #0xff (255)", + "mov w20, w21", + "ldr x21, [x20]", + "mov x4, x21" ] }, "mov rax, qword [ecx + 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0x100 (256)", - "mov w20, w20", - "ldr x4, [x20]" + "mov x20, x5", + "add x21, x20, #0x100 (256)", + "mov w20, w21", + "ldr x21, [x20]", + "mov x4, x21" ] }, "mov rax, qword [ecx + 32760]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x7ff8", - "add x20, x5, x20", - "mov w20, w20", - "ldr x4, [x20]" + "mov x20, x5", + "mov w21, #0x7ff8", + "add x22, x20, x21", + "mov w20, w22", + "ldr x21, [x20]", + "mov x4, x21" ] }, "mov rax, qword [ecx + 32761]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x7ff9", - "add x20, x5, x20", - "mov w20, w20", - "ldr x4, [x20]" + "mov x20, x5", + "mov w21, #0x7ff9", + "add x22, x20, x21", + "mov w20, w22", + "ldr x21, [x20]", + "mov x4, x21" ] }, "mov rax, qword [ecx + 32762]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x7ffa", - "add x20, x5, x20", - "mov w20, w20", - "ldr x4, [x20]" + "mov x20, x5", + "mov w21, #0x7ffa", + "add x22, x20, x21", + "mov w20, w22", + "ldr x21, [x20]", + "mov x4, x21" ] }, "mov rax, qword [ecx + 32763]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x7ffb", - "add x20, x5, x20", - "mov w20, w20", - "ldr x4, [x20]" + "mov x20, x5", + "mov w21, #0x7ffb", + "add x22, x20, x21", + "mov w20, w22", + "ldr x21, [x20]", + "mov x4, x21" ] }, "mov rax, qword [ecx + 32764]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x7ffc", - "add x20, x5, x20", - "mov w20, w20", - "ldr x4, [x20]" + "mov x20, x5", + "mov w21, #0x7ffc", + "add x22, x20, x21", + "mov w20, w22", + "ldr x21, [x20]", + "mov x4, x21" ] }, "mov rax, qword [ecx + 32765]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x7ffd", - "add x20, x5, x20", - "mov w20, w20", - "ldr x4, [x20]" + "mov x20, x5", + "mov w21, #0x7ffd", + "add x22, x20, x21", + "mov w20, w22", + "ldr x21, [x20]", + "mov x4, x21" ] }, "mov rax, qword [ecx + 32766]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x7ffe", - "add x20, x5, x20", - "mov w20, w20", - "ldr x4, [x20]" + "mov x20, x5", + "mov w21, #0x7ffe", + "add x22, x20, x21", + "mov w20, w22", + "ldr x21, [x20]", + "mov x4, x21" ] }, "mov rax, qword [ecx + 32767]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x7fff", - "add x20, x5, x20", - "mov w20, w20", - "ldr x4, [x20]" + "mov x20, x5", + "mov w21, #0x7fff", + "add x22, x20, x21", + "mov w20, w22", + "ldr x21, [x20]", + "mov x4, x21" ] }, "mov rax, qword [ecx + 32768]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0x8000 (32768)", - "mov w20, w20", - "ldr x4, [x20]" + "mov x20, x5", + "add x21, x20, #0x8000 (32768)", + "mov w20, w21", + "ldr x21, [x20]", + "mov x4, x21" ] }, "movzx rax, byte [rcx - 257]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "sub x20, x5, #0x101 (257)", - "ldrb w4, [x20]" + "mov x20, x5", + "sub x21, x20, #0x101 (257)", + "ldrb w20, [x21]", + "mov x4, x20" ] }, "movzx rax, byte [rcx - 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "sub x20, x5, #0x100 (256)", - "ldrb w4, [x20]" + "mov x20, x5", + "sub x21, x20, #0x100 (256)", + "ldrb w20, [x21]", + "mov x4, x20" ] }, "movzx rax, byte [rcx + 255]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldrb w4, [x5, #255]" + "mov x20, x5", + "ldrb w21, [x20, #255]", + "mov x4, x21" ] }, "movzx rax, byte [rcx + 256]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldrb w4, [x5, #256]" + "mov x20, x5", + "ldrb w21, [x20, #256]", + "mov x4, x21" ] }, "movzx rax, byte [rcx + 4095]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldrb w4, [x5, #4095]" + "mov x20, x5", + "ldrb w21, [x20, #4095]", + "mov x4, x21" ] }, "movzx rax, byte [rcx + 4096]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x1000", - "ldrb w4, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x1000", + "ldrb w22, [x20, x21, sxtx]", + "mov x4, x22" ] }, "movzx rax, word [rcx - 257]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "sub x20, x5, #0x101 (257)", - "ldrh w4, [x20]" + "mov x20, x5", + "sub x21, x20, #0x101 (257)", + "ldrh w20, [x21]", + "mov x4, x20" ] }, "movzx rax, word [rcx - 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "sub x20, x5, #0x100 (256)", - "ldrh w4, [x20]" + "mov x20, x5", + "sub x21, x20, #0x100 (256)", + "ldrh w20, [x21]", + "mov x4, x20" ] }, "movzx rax, word [rcx + 255]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldurh w4, [x5, #255]" + "mov x20, x5", + "ldurh w21, [x20, #255]", + "mov x4, x21" ] }, "movzx rax, word [rcx + 256]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldrh w4, [x5, #256]" + "mov x20, x5", + "ldrh w21, [x20, #256]", + "mov x4, x21" ] }, "movzx rax, word [rcx + 8190]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldrh w4, [x5, #8190]" + "mov x20, x5", + "ldrh w21, [x20, #8190]", + "mov x4, x21" ] }, "movzx rax, word [rcx + 8191]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x1fff", - "ldrh w4, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x1fff", + "ldrh w22, [x20, x21, sxtx]", + "mov x4, x22" ] }, "movzx rax, word [rcx + 8192]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x2000", - "ldrh w4, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x2000", + "ldrh w22, [x20, x21, sxtx]", + "mov x4, x22" ] }, "mov eax, dword [rcx - 257]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "sub x20, x5, #0x101 (257)", - "ldr w4, [x20]" + "mov x20, x5", + "sub x21, x20, #0x101 (257)", + "ldr w20, [x21]", + "mov x4, x20" ] }, "mov eax, dword [rcx - 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "sub x20, x5, #0x100 (256)", - "ldr w4, [x20]" + "mov x20, x5", + "sub x21, x20, #0x100 (256)", + "ldr w20, [x21]", + "mov x4, x20" ] }, "mov eax, dword [rcx + 255]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldur w4, [x5, #255]" + "mov x20, x5", + "ldur w21, [x20, #255]", + "mov x4, x21" ] }, "mov eax, dword [rcx + 256]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldr w4, [x5, #256]" + "mov x20, x5", + "ldr w21, [x20, #256]", + "mov x4, x21" ] }, "mov eax, dword [rcx + 16380]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldr w4, [x5, #16380]" + "mov x20, x5", + "ldr w21, [x20, #16380]", + "mov x4, x21" ] }, "mov eax, dword [rcx + 16381]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x3ffd", - "ldr w4, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x3ffd", + "ldr w22, [x20, x21, sxtx]", + "mov x4, x22" ] }, "mov eax, dword [rcx + 16382]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x3ffe", - "ldr w4, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x3ffe", + "ldr w22, [x20, x21, sxtx]", + "mov x4, x22" ] }, "mov eax, dword [rcx + 16383]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x3fff", - "ldr w4, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x3fff", + "ldr w22, [x20, x21, sxtx]", + "mov x4, x22" ] }, "mov eax, dword [rcx + 16384]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x4000", - "ldr w4, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x4000", + "ldr w22, [x20, x21, sxtx]", + "mov x4, x22" ] }, "mov rax, qword [rcx - 257]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "sub x20, x5, #0x101 (257)", - "ldr x4, [x20]" + "mov x20, x5", + "sub x21, x20, #0x101 (257)", + "ldr x20, [x21]", + "mov x4, x20" ] }, "mov rax, qword [rcx - 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "sub x20, x5, #0x100 (256)", - "ldr x4, [x20]" + "mov x20, x5", + "sub x21, x20, #0x100 (256)", + "ldr x20, [x21]", + "mov x4, x20" ] }, "mov rax, qword [rcx + 255]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldur x4, [x5, #255]" + "mov x20, x5", + "ldur x21, [x20, #255]", + "mov x4, x21" ] }, "mov rax, qword [rcx + 256]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldr x4, [x5, #256]" + "mov x20, x5", + "ldr x21, [x20, #256]", + "mov x4, x21" ] }, "mov rax, qword [rcx + 32760]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldr x4, [x5, #32760]" + "mov x20, x5", + "ldr x21, [x20, #32760]", + "mov x4, x21" ] }, "mov rax, qword [rcx + 32761]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x7ff9", - "ldr x4, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x7ff9", + "ldr x22, [x20, x21, sxtx]", + "mov x4, x22" ] }, "mov rax, qword [rcx + 32762]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x7ffa", - "ldr x4, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x7ffa", + "ldr x22, [x20, x21, sxtx]", + "mov x4, x22" ] }, "mov rax, qword [rcx + 32763]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x7ffb", - "ldr x4, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x7ffb", + "ldr x22, [x20, x21, sxtx]", + "mov x4, x22" ] }, "mov rax, qword [rcx + 32764]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x7ffc", - "ldr x4, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x7ffc", + "ldr x22, [x20, x21, sxtx]", + "mov x4, x22" ] }, "mov rax, qword [rcx + 32765]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x7ffd", - "ldr x4, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x7ffd", + "ldr x22, [x20, x21, sxtx]", + "mov x4, x22" ] }, "mov rax, qword [rcx + 32766]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x7ffe", - "ldr x4, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x7ffe", + "ldr x22, [x20, x21, sxtx]", + "mov x4, x22" ] }, "mov rax, qword [rcx + 32767]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x7fff", - "ldr x4, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x7fff", + "ldr x22, [x20, x21, sxtx]", + "mov x4, x22" ] }, "mov rax, qword [rcx + 32768]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x8000", - "ldr x4, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x8000", + "ldr x22, [x20, x21, sxtx]", + "mov x4, x22" ] }, "movss xmm0, [rcx + 16379]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x3ffb", - "ldr s16, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x3ffb", + "ldr s2, [x20, x21, sxtx]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [rcx + 16380]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldr s16, [x5, #16380]" + "mov x20, x5", + "ldr s2, [x20, #16380]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [rcx + 16381]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x3ffd", - "ldr s16, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x3ffd", + "ldr s2, [x20, x21, sxtx]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [rcx - 257]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "sub x20, x5, #0x101 (257)", - "ldr s16, [x20]" + "mov x20, x5", + "sub x21, x20, #0x101 (257)", + "ldr s2, [x21]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [rcx - 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "sub x20, x5, #0x100 (256)", - "ldr s16, [x20]" + "mov x20, x5", + "sub x21, x20, #0x100 (256)", + "ldr s2, [x21]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [rcx + 255]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldur s16, [x5, #255]" + "mov x20, x5", + "ldur s2, [x20, #255]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [rcx + 256]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldr s16, [x5, #256]" + "mov x20, x5", + "ldr s2, [x20, #256]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [rcx + 32759]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x7ff7", - "ldr d16, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x7ff7", + "ldr d2, [x20, x21, sxtx]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [rcx + 32760]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldr d16, [x5, #32760]" + "mov x20, x5", + "ldr d2, [x20, #32760]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [rcx + 32761]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x7ff9", - "ldr d16, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x7ff9", + "ldr d2, [x20, x21, sxtx]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [rcx - 257]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "sub x20, x5, #0x101 (257)", - "ldr d16, [x20]" + "mov x20, x5", + "sub x21, x20, #0x101 (257)", + "ldr d2, [x21]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [rcx - 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "sub x20, x5, #0x100 (256)", - "ldr d16, [x20]" + "mov x20, x5", + "sub x21, x20, #0x100 (256)", + "ldr d2, [x21]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [rcx + 255]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldur d16, [x5, #255]" + "mov x20, x5", + "ldur d2, [x20, #255]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [rcx + 256]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldr d16, [x5, #256]" + "mov x20, x5", + "ldr d2, [x20, #256]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [rcx + 65519]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0xffef", - "ldr d16, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0xffef", + "ldr d2, [x20, x21, sxtx]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [rcx + 65520]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0xfff0", - "ldr d16, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0xfff0", + "ldr d2, [x20, x21, sxtx]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [rcx + 65521]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0xfff1", - "ldr d16, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0xfff1", + "ldr d2, [x20, x21, sxtx]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [rcx - 257]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "sub x20, x5, #0x101 (257)", - "ldr d16, [x20]" + "mov x20, x5", + "sub x21, x20, #0x101 (257)", + "ldr d2, [x21]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [rcx - 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "sub x20, x5, #0x100 (256)", - "ldr d16, [x20]" + "mov x20, x5", + "sub x21, x20, #0x100 (256)", + "ldr d2, [x21]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [rcx + 255]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldur d16, [x5, #255]" + "mov x20, x5", + "ldur d2, [x20, #255]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [rcx + 256]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldr d16, [x5, #256]" + "mov x20, x5", + "ldr d2, [x20, #256]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [ecx + 16379]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x3ffb", - "add x20, x5, x20", - "mov w20, w20", - "ldr s16, [x20]" + "mov x20, x5", + "mov w21, #0x3ffb", + "add x22, x20, x21", + "mov w20, w22", + "ldr s2, [x20]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [ecx + 16380]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x3ffc", - "add x20, x5, x20", - "mov w20, w20", - "ldr s16, [x20]" + "mov x20, x5", + "mov w21, #0x3ffc", + "add x22, x20, x21", + "mov w20, w22", + "ldr s2, [x20]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [ecx + 16381]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x3ffd", - "add x20, x5, x20", - "mov w20, w20", - "ldr s16, [x20]" + "mov x20, x5", + "mov w21, #0x3ffd", + "add x22, x20, x21", + "mov w20, w22", + "ldr s2, [x20]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [ecx - 257]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "sub x20, x5, #0x101 (257)", - "mov w20, w20", - "ldr s16, [x20]" + "mov x20, x5", + "sub x21, x20, #0x101 (257)", + "mov w20, w21", + "ldr s2, [x20]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [ecx - 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "sub x20, x5, #0x100 (256)", - "mov w20, w20", - "ldr s16, [x20]" + "mov x20, x5", + "sub x21, x20, #0x100 (256)", + "mov w20, w21", + "ldr s2, [x20]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [ecx + 255]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0xff (255)", - "mov w20, w20", - "ldr s16, [x20]" + "mov x20, x5", + "add x21, x20, #0xff (255)", + "mov w20, w21", + "ldr s2, [x20]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [ecx + 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0x100 (256)", - "mov w20, w20", - "ldr s16, [x20]" + "mov x20, x5", + "add x21, x20, #0x100 (256)", + "mov w20, w21", + "ldr s2, [x20]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [ecx + 32759]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x7ff7", - "add x20, x5, x20", - "mov w20, w20", - "ldr d16, [x20]" + "mov x20, x5", + "mov w21, #0x7ff7", + "add x22, x20, x21", + "mov w20, w22", + "ldr d2, [x20]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [ecx + 32760]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x7ff8", - "add x20, x5, x20", - "mov w20, w20", - "ldr d16, [x20]" + "mov x20, x5", + "mov w21, #0x7ff8", + "add x22, x20, x21", + "mov w20, w22", + "ldr d2, [x20]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [ecx + 32761]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0x7ff9", - "add x20, x5, x20", - "mov w20, w20", - "ldr d16, [x20]" + "mov x20, x5", + "mov w21, #0x7ff9", + "add x22, x20, x21", + "mov w20, w22", + "ldr d2, [x20]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [ecx - 257]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "sub x20, x5, #0x101 (257)", - "mov w20, w20", - "ldr d16, [x20]" + "mov x20, x5", + "sub x21, x20, #0x101 (257)", + "mov w20, w21", + "ldr d2, [x20]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [ecx - 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "sub x20, x5, #0x100 (256)", - "mov w20, w20", - "ldr d16, [x20]" + "mov x20, x5", + "sub x21, x20, #0x100 (256)", + "mov w20, w21", + "ldr d2, [x20]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [ecx + 255]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0xff (255)", - "mov w20, w20", - "ldr d16, [x20]" + "mov x20, x5", + "add x21, x20, #0xff (255)", + "mov w20, w21", + "ldr d2, [x20]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [ecx + 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0x100 (256)", - "mov w20, w20", - "ldr d16, [x20]" + "mov x20, x5", + "add x21, x20, #0x100 (256)", + "mov w20, w21", + "ldr d2, [x20]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [ecx + 65519]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0xffef", - "add x20, x5, x20", - "mov w20, w20", - "ldr d16, [x20]" + "mov x20, x5", + "mov w21, #0xffef", + "add x22, x20, x21", + "mov w20, w22", + "ldr d2, [x20]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [ecx + 65520]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0xfff0", - "add x20, x5, x20", - "mov w20, w20", - "ldr d16, [x20]" + "mov x20, x5", + "mov w21, #0xfff0", + "add x22, x20, x21", + "mov w20, w22", + "ldr d2, [x20]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [ecx + 65521]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "ExpectedArm64ASM": [ - "mov w20, #0xfff1", - "add x20, x5, x20", - "mov w20, w20", - "ldr d16, [x20]" + "mov x20, x5", + "mov w21, #0xfff1", + "add x22, x20, x21", + "mov w20, w22", + "ldr d2, [x20]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [ecx - 257]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "sub x20, x5, #0x101 (257)", - "mov w20, w20", - "ldr d16, [x20]" + "mov x20, x5", + "sub x21, x20, #0x101 (257)", + "mov w20, w21", + "ldr d2, [x20]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [ecx - 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "sub x20, x5, #0x100 (256)", - "mov w20, w20", - "ldr d16, [x20]" + "mov x20, x5", + "sub x21, x20, #0x100 (256)", + "mov w20, w21", + "ldr d2, [x20]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [ecx + 255]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0xff (255)", - "mov w20, w20", - "ldr d16, [x20]" + "mov x20, x5", + "add x21, x20, #0xff (255)", + "mov w20, w21", + "ldr d2, [x20]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [ecx + 256]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "add x20, x5, #0x100 (256)", - "mov w20, w20", - "ldr d16, [x20]" + "mov x20, x5", + "add x21, x20, #0x100 (256)", + "mov w20, w21", + "ldr d2, [x20]", + "mov v16.16b, v2.16b" ] }, "prefetch [rcx - 257]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "sub x20, x5, #0x101 (257)", - "prfm pldl1keep, [x20]" + "mov x20, x5", + "sub x21, x20, #0x101 (257)", + "prfm pldl1keep, [x21]" ] }, "prefetch [rcx - 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "sub x20, x5, #0x100 (256)", - "prfm pldl1keep, [x20]" + "mov x20, x5", + "sub x21, x20, #0x100 (256)", + "prfm pldl1keep, [x21]" ] }, "prefetch [rcx + 255]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ - "prfum pldl1keep, [x5, #255]" + "mov x20, x5", + "prfum pldl1keep, [x20, #255]" ] }, "prefetch [rcx + 256]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ - "prfm pldl1keep, [x5, #256]" + "mov x20, x5", + "prfm pldl1keep, [x20, #256]" ] }, "prefetch [rcx + 32760]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "ExpectedArm64ASM": [ - "prfm pldl1keep, [x5, #32760]" + "mov x20, x5", + "prfm pldl1keep, [x20, #32760]" ] }, "prefetch [rcx + 32761]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "mov w20, #0x7ff9", - "prfm pldl1keep, [x5, x20, sxtx]" + "mov x20, x5", + "mov w21, #0x7ff9", + "prfm pldl1keep, [x20, x21, sxtx]" ] }, "prefetch [rax + rcx*1]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "prfm pldl1keep, [x4, x5, sxtx]" + "mov x20, x4", + "mov x21, x5", + "prfm pldl1keep, [x20, x21, sxtx]" ] }, "prefetch [rax + rcx*2]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "add x20, x4, x5, lsl #1", - "prfm pldl1keep, [x20]" + "mov x20, x4", + "mov x21, x5", + "add x22, x20, x21, lsl #1", + "prfm pldl1keep, [x22]" ] }, "prefetch [rax + rcx*4]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "add x20, x4, x5, lsl #2", - "prfm pldl1keep, [x20]" + "mov x20, x4", + "mov x21, x5", + "add x22, x20, x21, lsl #2", + "prfm pldl1keep, [x22]" ] }, "prefetch [rax + rcx*8]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "prfm pldl1keep, [x5, x4, sxtx #3]" + "mov x20, x4", + "mov x21, x5", + "prfm pldl1keep, [x21, x20, sxtx #3]" ] } } diff --git a/unittests/InstructionCountCI/FEXOpt/AddressingLimitations_32Bit.json b/unittests/InstructionCountCI/FEXOpt/AddressingLimitations_32Bit.json index c1f9b09346..43b73e192b 100644 --- a/unittests/InstructionCountCI/FEXOpt/AddressingLimitations_32Bit.json +++ b/unittests/InstructionCountCI/FEXOpt/AddressingLimitations_32Bit.json @@ -14,294 +14,380 @@ ], "Instructions": { "movzx eax, byte [ecx - 257]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0xfffffeff", - "ldrb w4, [x5, w20, sxtw]" + "mov w20, w5", + "mov w21, #0xfffffeff", + "ldrb w22, [x20, w21, sxtw]", + "mov w4, w22" ] }, "movzx eax, byte [ecx - 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0xffffff00", - "ldrb w4, [x5, w20, sxtw]" + "mov w20, w5", + "mov w21, #0xffffff00", + "ldrb w22, [x20, w21, sxtw]", + "mov w4, w22" ] }, "movzx eax, byte [ecx + 255]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldrb w4, [x5, #255]" + "mov w20, w5", + "ldrb w21, [x20, #255]", + "mov w4, w21" ] }, "movzx eax, byte [ecx + 256]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldrb w4, [x5, #256]" + "mov w20, w5", + "ldrb w21, [x20, #256]", + "mov w4, w21" ] }, "movzx eax, byte [ecx + 4095]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldrb w4, [x5, #4095]" + "mov w20, w5", + "ldrb w21, [x20, #4095]", + "mov w4, w21" ] }, "movzx eax, byte [ecx + 4096]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x1000", - "ldrb w4, [x5, x20, sxtx]" + "mov w20, w5", + "mov w21, #0x1000", + "ldrb w22, [x20, x21, sxtx]", + "mov w4, w22" ] }, "movzx eax, word [ecx - 257]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0xfffffeff", - "ldrh w4, [x5, w20, sxtw]" + "mov w20, w5", + "mov w21, #0xfffffeff", + "ldrh w22, [x20, w21, sxtw]", + "mov w4, w22" ] }, "movzx eax, word [ecx - 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0xffffff00", - "ldrh w4, [x5, w20, sxtw]" + "mov w20, w5", + "mov w21, #0xffffff00", + "ldrh w22, [x20, w21, sxtw]", + "mov w4, w22" ] }, "movzx eax, word [ecx + 255]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldurh w4, [x5, #255]" + "mov w20, w5", + "ldurh w21, [x20, #255]", + "mov w4, w21" ] }, "movzx eax, word [ecx + 256]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldrh w4, [x5, #256]" + "mov w20, w5", + "ldrh w21, [x20, #256]", + "mov w4, w21" ] }, "movzx eax, word [ecx + 8190]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldrh w4, [x5, #8190]" + "mov w20, w5", + "ldrh w21, [x20, #8190]", + "mov w4, w21" ] }, "movzx eax, word [ecx + 8191]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x1fff", - "ldrh w4, [x5, x20, sxtx]" + "mov w20, w5", + "mov w21, #0x1fff", + "ldrh w22, [x20, x21, sxtx]", + "mov w4, w22" ] }, "movzx eax, word [ecx + 8192]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x2000", - "ldrh w4, [x5, x20, sxtx]" + "mov w20, w5", + "mov w21, #0x2000", + "ldrh w22, [x20, x21, sxtx]", + "mov w4, w22" ] }, "mov eax, dword [ecx - 257]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0xfffffeff", - "ldr w4, [x5, w20, sxtw]" + "mov w20, w5", + "mov w21, #0xfffffeff", + "ldr w22, [x20, w21, sxtw]", + "mov w4, w22" ] }, "mov eax, dword [ecx - 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0xffffff00", - "ldr w4, [x5, w20, sxtw]" + "mov w20, w5", + "mov w21, #0xffffff00", + "ldr w22, [x20, w21, sxtw]", + "mov w4, w22" ] }, "mov eax, dword [ecx + 255]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldur w4, [x5, #255]" + "mov w20, w5", + "ldur w21, [x20, #255]", + "mov w4, w21" ] }, "mov eax, dword [ecx + 256]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldr w4, [x5, #256]" + "mov w20, w5", + "ldr w21, [x20, #256]", + "mov w4, w21" ] }, "mov eax, dword [ecx + 16380]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldr w4, [x5, #16380]" + "mov w20, w5", + "ldr w21, [x20, #16380]", + "mov w4, w21" ] }, "mov eax, dword [ecx + 16381]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x3ffd", - "ldr w4, [x5, x20, sxtx]" + "mov w20, w5", + "mov w21, #0x3ffd", + "ldr w22, [x20, x21, sxtx]", + "mov w4, w22" ] }, "mov eax, dword [ecx + 16382]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x3ffe", - "ldr w4, [x5, x20, sxtx]" + "mov w20, w5", + "mov w21, #0x3ffe", + "ldr w22, [x20, x21, sxtx]", + "mov w4, w22" ] }, "mov eax, dword [ecx + 16383]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x3fff", - "ldr w4, [x5, x20, sxtx]" + "mov w20, w5", + "mov w21, #0x3fff", + "ldr w22, [x20, x21, sxtx]", + "mov w4, w22" ] }, "mov eax, dword [ecx + 16384]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "add w20, w5, #0x4000 (16384)", - "ldr w4, [x20]" + "mov w20, w5", + "add w21, w20, #0x4000 (16384)", + "ldr w20, [x21]", + "mov w4, w20" ] }, "movss xmm0, [ecx + 16379]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x3ffb", - "ldr s16, [x5, x20, sxtx]" + "mov w20, w5", + "mov w21, #0x3ffb", + "ldr s2, [x20, x21, sxtx]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [ecx + 16380]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldr s16, [x5, #16380]" + "mov w20, w5", + "ldr s2, [x20, #16380]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [ecx + 16381]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0x3ffd", - "ldr s16, [x5, x20, sxtx]" + "mov w20, w5", + "mov w21, #0x3ffd", + "ldr s2, [x20, x21, sxtx]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [ecx - 257]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0xfffffeff", - "ldr s16, [x5, w20, sxtw]" + "mov w20, w5", + "mov w21, #0xfffffeff", + "ldr s2, [x20, w21, sxtw]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [ecx - 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0xffffff00", - "ldr s16, [x5, w20, sxtw]" + "mov w20, w5", + "mov w21, #0xffffff00", + "ldr s2, [x20, w21, sxtw]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [ecx + 255]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldur s16, [x5, #255]" + "mov w20, w5", + "ldur s2, [x20, #255]", + "mov v16.16b, v2.16b" ] }, "movss xmm0, [ecx + 256]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldr s16, [x5, #256]" + "mov w20, w5", + "ldr s2, [x20, #256]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [ecx + 32759]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "mov w20, #0x7ff7", - "add w20, w5, w20", - "ldr d16, [x20]" + "mov w20, w5", + "mov w21, #0x7ff7", + "add w22, w20, w21", + "ldr d2, [x22]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [ecx + 32760]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "mov w20, #0x7ff8", - "add w20, w5, w20", - "ldr d16, [x20]" + "mov w20, w5", + "mov w21, #0x7ff8", + "add w22, w20, w21", + "ldr d2, [x22]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [ecx + 32761]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "mov w20, #0x7ff9", - "add w20, w5, w20", - "ldr d16, [x20]" + "mov w20, w5", + "mov w21, #0x7ff9", + "add w22, w20, w21", + "ldr d2, [x22]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [ecx - 257]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0xfffffeff", - "ldr d16, [x5, w20, sxtw]" + "mov w20, w5", + "mov w21, #0xfffffeff", + "ldr d2, [x20, w21, sxtw]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [ecx - 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0xffffff00", - "ldr d16, [x5, w20, sxtw]" + "mov w20, w5", + "mov w21, #0xffffff00", + "ldr d2, [x20, w21, sxtw]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [ecx + 255]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldur d16, [x5, #255]" + "mov w20, w5", + "ldur d2, [x20, #255]", + "mov v16.16b, v2.16b" ] }, "movsd xmm0, [ecx + 256]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldr d16, [x5, #256]" + "mov w20, w5", + "ldr d2, [x20, #256]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [ecx + 65519]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "mov w20, #0xffef", - "add w20, w5, w20", - "ldr d16, [x20]" + "mov w20, w5", + "mov w21, #0xffef", + "add w22, w20, w21", + "ldr d2, [x22]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [ecx + 65520]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "mov w20, #0xfff0", - "add w20, w5, w20", - "ldr d16, [x20]" + "mov w20, w5", + "mov w21, #0xfff0", + "add w22, w20, w21", + "ldr d2, [x22]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [ecx + 65521]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "ExpectedArm64ASM": [ - "mov w20, #0xfff1", - "add w20, w5, w20", - "ldr d16, [x20]" + "mov w20, w5", + "mov w21, #0xfff1", + "add w22, w20, w21", + "ldr d2, [x22]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [ecx - 257]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0xfffffeff", - "ldr d16, [x5, w20, sxtw]" + "mov w20, w5", + "mov w21, #0xfffffeff", + "ldr d2, [x20, w21, sxtw]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [ecx - 256]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "ExpectedArm64ASM": [ - "mov w20, #0xffffff00", - "ldr d16, [x5, w20, sxtw]" + "mov w20, w5", + "mov w21, #0xffffff00", + "ldr d2, [x20, w21, sxtw]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [ecx + 255]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldur d16, [x5, #255]" + "mov w20, w5", + "ldur d2, [x20, #255]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, [ecx + 256]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ldr d16, [x5, #256]" + "mov w20, w5", + "ldr d2, [x20, #256]", + "mov v16.16b, v2.16b" ] } } diff --git a/unittests/InstructionCountCI/FEXOpt/MultiInst.json b/unittests/InstructionCountCI/FEXOpt/MultiInst.json index 53237c8ea2..a90b9e7cdf 100644 --- a/unittests/InstructionCountCI/FEXOpt/MultiInst.json +++ b/unittests/InstructionCountCI/FEXOpt/MultiInst.json @@ -14,7 +14,7 @@ ], "Instructions": { "push ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 9, "Comment": [ "Mergable 16-bit pushes. May or may not be an optimization." ], @@ -23,12 +23,19 @@ "push bx" ], "ExpectedArm64ASM": [ - "strh w4, [x8, #-2]!", - "strh w7, [x8, #-2]!" + "mov x20, x4", + "mov x21, x8", + "mov x22, x21", + "strh w20, [x22, #-2]!", + "mov x8, x22", + "mov x20, x7", + "mov x21, x22", + "strh w20, [x21, #-2]!", + "mov x8, x21" ] }, "push rax, rbx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 9, "Comment": [ "Mergable 64-bit pushes" ], @@ -37,12 +44,19 @@ "push rbx" ], "ExpectedArm64ASM": [ - "str x4, [x8, #-8]!", - "str x7, [x8, #-8]!" + "mov x20, x4", + "mov x21, x8", + "mov x22, x21", + "str x20, [x22, #-8]!", + "mov x8, x22", + "mov x20, x7", + "mov x21, x22", + "str x20, [x21, #-8]!", + "mov x8, x21" ] }, "adds xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 11, "Comment": [ "Redundant scalar adds that can get eliminated without AFP." ], @@ -51,14 +65,21 @@ "addss xmm0, xmm2" ], "ExpectedArm64ASM": [ - "fadd s0, s16, s17", - "mov v16.s[0], v0.s[0]", - "fadd s0, s16, s18", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fadd s0, s2, s3", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b", + "mov v2.16b, v18.16b", + "mov v3.16b, v4.16b", + "fadd s0, s4, s2", + "mov v3.s[0], v0.s[0]", + "mov v16.16b, v3.16b" ] }, "positive movsb": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -70,14 +91,18 @@ "ExpectedArm64ASM": [ "mov w20, #0x1", "strb w20, [x28, #714]", - "ldrb w20, [x10]", - "strb w20, [x11]", - "add x10, x10, #0x1 (1)", - "add x11, x11, #0x1 (1)" + "mov x20, x10", + "mov x21, x11", + "ldrb w22, [x20]", + "strb w22, [x21]", + "add x22, x20, #0x1 (1)", + "add x20, x21, #0x1 (1)", + "mov x10, x22", + "mov x11, x20" ] }, "positive movsw": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -89,14 +114,18 @@ "ExpectedArm64ASM": [ "mov w20, #0x1", "strb w20, [x28, #714]", - "ldrh w20, [x10]", - "strh w20, [x11]", - "add x10, x10, #0x2 (2)", - "add x11, x11, #0x2 (2)" + "mov x20, x10", + "mov x21, x11", + "ldrh w22, [x20]", + "strh w22, [x21]", + "add x22, x20, #0x2 (2)", + "add x20, x21, #0x2 (2)", + "mov x10, x22", + "mov x11, x20" ] }, "positive movsd": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -108,14 +137,18 @@ "ExpectedArm64ASM": [ "mov w20, #0x1", "strb w20, [x28, #714]", - "ldr w20, [x10]", - "str w20, [x11]", - "add x10, x10, #0x4 (4)", - "add x11, x11, #0x4 (4)" + "mov x20, x10", + "mov x21, x11", + "ldr w22, [x20]", + "str w22, [x21]", + "add x22, x20, #0x4 (4)", + "add x20, x21, #0x4 (4)", + "mov x10, x22", + "mov x11, x20" ] }, "positive movsq": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -127,14 +160,18 @@ "ExpectedArm64ASM": [ "mov w20, #0x1", "strb w20, [x28, #714]", - "ldr x20, [x10]", - "str x20, [x11]", - "add x10, x10, #0x8 (8)", - "add x11, x11, #0x8 (8)" + "mov x20, x10", + "mov x21, x11", + "ldr x22, [x20]", + "str x22, [x21]", + "add x22, x20, #0x8 (8)", + "add x20, x21, #0x8 (8)", + "mov x10, x22", + "mov x11, x20" ] }, "negative movsb": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -146,14 +183,18 @@ "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "strb w20, [x28, #714]", - "ldrb w20, [x10]", - "strb w20, [x11]", - "sub x10, x10, #0x1 (1)", - "sub x11, x11, #0x1 (1)" + "mov x20, x10", + "mov x21, x11", + "ldrb w22, [x20]", + "strb w22, [x21]", + "sub x22, x20, #0x1 (1)", + "sub x20, x21, #0x1 (1)", + "mov x10, x22", + "mov x11, x20" ] }, "negative movsw": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -165,14 +206,18 @@ "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "strb w20, [x28, #714]", - "ldrh w20, [x10]", - "strh w20, [x11]", - "sub x10, x10, #0x2 (2)", - "sub x11, x11, #0x2 (2)" + "mov x20, x10", + "mov x21, x11", + "ldrh w22, [x20]", + "strh w22, [x21]", + "sub x22, x20, #0x2 (2)", + "sub x20, x21, #0x2 (2)", + "mov x10, x22", + "mov x11, x20" ] }, "negative movsd": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -184,14 +229,18 @@ "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "strb w20, [x28, #714]", - "ldr w20, [x10]", - "str w20, [x11]", - "sub x10, x10, #0x4 (4)", - "sub x11, x11, #0x4 (4)" + "mov x20, x10", + "mov x21, x11", + "ldr w22, [x20]", + "str w22, [x21]", + "sub x22, x20, #0x4 (4)", + "sub x20, x21, #0x4 (4)", + "mov x10, x22", + "mov x11, x20" ] }, "negative movsq": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -203,14 +252,18 @@ "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "strb w20, [x28, #714]", - "ldr x20, [x10]", - "str x20, [x11]", - "sub x10, x10, #0x8 (8)", - "sub x11, x11, #0x8 (8)" + "mov x20, x10", + "mov x21, x11", + "ldr x22, [x20]", + "str x22, [x21]", + "sub x22, x20, #0x8 (8)", + "sub x20, x21, #0x8 (8)", + "mov x10, x22", + "mov x11, x20" ] }, "positive rep movsb": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 49, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -223,9 +276,12 @@ "mov w20, #0x0", "mov w21, #0x1", "strb w21, [x28, #714]", - "mov x0, x5", - "mov x1, x11", - "mov x2, x10", + "mov x21, x10", + "mov x22, x11", + "mov x23, x5", + "mov x0, x23", + "mov x1, x22", + "mov x2, x21", "cbz x0, #+0x78", "sub x3, x1, x2", "tbz x3, #63, #+0x8", @@ -256,18 +312,20 @@ "strb w3, [x1], #1", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0xc", - "mov x0, x11", - "mov x1, x10", - "mov x2, x5", - "add x22, x0, x2", - "add x23, x1, x2", - "mov x11, x22", - "mov x10, x23", - "mov x5, x20" + "mov x0, x22", + "mov x1, x21", + "mov x2, x23", + "add x24, x0, x2", + "add x25, x1, x2", + "mov x21, x24", + "mov x22, x25", + "mov x5, x20", + "mov x11, x21", + "mov x10, x22" ] }, "positive rep movsw": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 49, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -280,9 +338,12 @@ "mov w20, #0x0", "mov w21, #0x1", "strb w21, [x28, #714]", - "mov x0, x5", - "mov x1, x11", - "mov x2, x10", + "mov x21, x10", + "mov x22, x11", + "mov x23, x5", + "mov x0, x23", + "mov x1, x22", + "mov x2, x21", "cbz x0, #+0x78", "sub x3, x1, x2", "tbz x3, #63, #+0x8", @@ -313,18 +374,20 @@ "strh w3, [x1], #2", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0xc", - "mov x0, x11", - "mov x1, x10", - "mov x2, x5", - "add x22, x0, x2, lsl #1", - "add x23, x1, x2, lsl #1", - "mov x11, x22", - "mov x10, x23", - "mov x5, x20" + "mov x0, x22", + "mov x1, x21", + "mov x2, x23", + "add x24, x0, x2, lsl #1", + "add x25, x1, x2, lsl #1", + "mov x21, x24", + "mov x22, x25", + "mov x5, x20", + "mov x11, x21", + "mov x10, x22" ] }, "positive rep movsd": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 49, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -337,9 +400,12 @@ "mov w20, #0x0", "mov w21, #0x1", "strb w21, [x28, #714]", - "mov x0, x5", - "mov x1, x11", - "mov x2, x10", + "mov x21, x10", + "mov x22, x11", + "mov x23, x5", + "mov x0, x23", + "mov x1, x22", + "mov x2, x21", "cbz x0, #+0x78", "sub x3, x1, x2", "tbz x3, #63, #+0x8", @@ -370,18 +436,20 @@ "str w3, [x1], #4", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0xc", - "mov x0, x11", - "mov x1, x10", - "mov x2, x5", - "add x22, x0, x2, lsl #2", - "add x23, x1, x2, lsl #2", - "mov x11, x22", - "mov x10, x23", - "mov x5, x20" + "mov x0, x22", + "mov x1, x21", + "mov x2, x23", + "add x24, x0, x2, lsl #2", + "add x25, x1, x2, lsl #2", + "mov x21, x24", + "mov x22, x25", + "mov x5, x20", + "mov x11, x21", + "mov x10, x22" ] }, "positive rep movsq": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 49, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -394,9 +462,12 @@ "mov w20, #0x0", "mov w21, #0x1", "strb w21, [x28, #714]", - "mov x0, x5", - "mov x1, x11", - "mov x2, x10", + "mov x21, x10", + "mov x22, x11", + "mov x23, x5", + "mov x0, x23", + "mov x1, x22", + "mov x2, x21", "cbz x0, #+0x78", "sub x3, x1, x2", "tbz x3, #63, #+0x8", @@ -427,18 +498,20 @@ "str x3, [x1], #8", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0xc", - "mov x0, x11", - "mov x1, x10", - "mov x2, x5", - "add x22, x0, x2, lsl #3", - "add x23, x1, x2, lsl #3", - "mov x11, x22", - "mov x10, x23", - "mov x5, x20" + "mov x0, x22", + "mov x1, x21", + "mov x2, x23", + "add x24, x0, x2, lsl #3", + "add x25, x1, x2, lsl #3", + "mov x21, x24", + "mov x22, x25", + "mov x5, x20", + "mov x11, x21", + "mov x10, x22" ] }, "negative rep movsb": { - "ExpectedInstructionCount": 47, + "ExpectedInstructionCount": 53, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -450,9 +523,12 @@ "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "strb w20, [x28, #714]", - "mov x0, x5", - "mov x1, x11", - "mov x2, x10", + "mov x20, x10", + "mov x21, x11", + "mov x22, x5", + "mov x0, x22", + "mov x1, x21", + "mov x2, x20", "cbz x0, #+0x88", "sub x3, x1, x2", "tbz x3, #63, #+0x8", @@ -487,18 +563,21 @@ "strb w3, [x1], #-1", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0xc", - "mov x0, x11", - "mov x1, x10", - "mov x2, x5", - "sub x20, x0, x2", - "sub x21, x1, x2", + "mov x0, x21", + "mov x1, x20", + "mov x2, x22", + "sub x24, x0, x2", + "sub x25, x1, x2", + "mov x20, x24", + "mov x21, x25", + "mov w22, #0x0", + "mov x5, x22", "mov x11, x20", - "mov x10, x21", - "mov w5, #0x0" + "mov x10, x21" ] }, "negative rep movsw": { - "ExpectedInstructionCount": 47, + "ExpectedInstructionCount": 53, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -510,9 +589,12 @@ "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "strb w20, [x28, #714]", - "mov x0, x5", - "mov x1, x11", - "mov x2, x10", + "mov x20, x10", + "mov x21, x11", + "mov x22, x5", + "mov x0, x22", + "mov x1, x21", + "mov x2, x20", "cbz x0, #+0x88", "sub x3, x1, x2", "tbz x3, #63, #+0x8", @@ -547,18 +629,21 @@ "strh w3, [x1], #-2", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0xc", - "mov x0, x11", - "mov x1, x10", - "mov x2, x5", - "sub x20, x0, x2, lsl #1", - "sub x21, x1, x2, lsl #1", + "mov x0, x21", + "mov x1, x20", + "mov x2, x22", + "sub x24, x0, x2, lsl #1", + "sub x25, x1, x2, lsl #1", + "mov x20, x24", + "mov x21, x25", + "mov w22, #0x0", + "mov x5, x22", "mov x11, x20", - "mov x10, x21", - "mov w5, #0x0" + "mov x10, x21" ] }, "negative rep movsd": { - "ExpectedInstructionCount": 47, + "ExpectedInstructionCount": 53, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -570,9 +655,12 @@ "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "strb w20, [x28, #714]", - "mov x0, x5", - "mov x1, x11", - "mov x2, x10", + "mov x20, x10", + "mov x21, x11", + "mov x22, x5", + "mov x0, x22", + "mov x1, x21", + "mov x2, x20", "cbz x0, #+0x88", "sub x3, x1, x2", "tbz x3, #63, #+0x8", @@ -607,18 +695,21 @@ "str w3, [x1], #-4", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0xc", - "mov x0, x11", - "mov x1, x10", - "mov x2, x5", - "sub x20, x0, x2, lsl #2", - "sub x21, x1, x2, lsl #2", + "mov x0, x21", + "mov x1, x20", + "mov x2, x22", + "sub x24, x0, x2, lsl #2", + "sub x25, x1, x2, lsl #2", + "mov x20, x24", + "mov x21, x25", + "mov w22, #0x0", + "mov x5, x22", "mov x11, x20", - "mov x10, x21", - "mov w5, #0x0" + "mov x10, x21" ] }, "negative rep movsq": { - "ExpectedInstructionCount": 47, + "ExpectedInstructionCount": 53, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -630,9 +721,12 @@ "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "strb w20, [x28, #714]", - "mov x0, x5", - "mov x1, x11", - "mov x2, x10", + "mov x20, x10", + "mov x21, x11", + "mov x22, x5", + "mov x0, x22", + "mov x1, x21", + "mov x2, x20", "cbz x0, #+0x88", "sub x3, x1, x2", "tbz x3, #63, #+0x8", @@ -667,18 +761,21 @@ "str x3, [x1], #-8", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0xc", - "mov x0, x11", - "mov x1, x10", - "mov x2, x5", - "sub x20, x0, x2, lsl #3", - "sub x21, x1, x2, lsl #3", + "mov x0, x21", + "mov x1, x20", + "mov x2, x22", + "sub x24, x0, x2, lsl #3", + "sub x25, x1, x2, lsl #3", + "mov x20, x24", + "mov x21, x25", + "mov w22, #0x0", + "mov x5, x22", "mov x11, x20", - "mov x10, x21", - "mov w5, #0x0" + "mov x10, x21" ] }, "positive rep stosb": { - "ExpectedInstructionCount": 30, + "ExpectedInstructionCount": 34, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -691,13 +788,16 @@ "mov w20, #0x0", "mov w21, #0x1", "strb w21, [x28, #714]", - "uxtb w21, w4", - "mov x0, x5", - "mov x1, x11", + "mov x21, x4", + "uxtb w22, w21", + "mov x21, x11", + "mov x23, x5", + "mov x0, x23", + "mov x1, x21", "cbz x0, #+0x58", "sub x0, x0, #0x20 (32)", "tbnz x0, #63, #+0x3c", - "dup v1.16b, w21", + "dup v1.16b, w22", "sub x0, x0, #0x20 (32)", "tbnz x0, #63, #+0x14", "stp q1, q1, [x1], #32", @@ -713,15 +813,16 @@ "tbz x0, #63, #-0x8", "add x0, x0, #0x20 (32)", "cbz x0, #+0x10", - "strb w21, [x1], #1", + "strb w22, [x1], #1", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0x8", - "add x11, x11, x5", - "mov x5, x20" + "add x24, x21, x23", + "mov x5, x20", + "mov x11, x24" ] }, "positive rep stosw": { - "ExpectedInstructionCount": 30, + "ExpectedInstructionCount": 34, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -734,13 +835,16 @@ "mov w20, #0x0", "mov w21, #0x1", "strb w21, [x28, #714]", - "uxth w21, w4", - "mov x0, x5", - "mov x1, x11", + "mov x21, x4", + "uxth w22, w21", + "mov x21, x11", + "mov x23, x5", + "mov x0, x23", + "mov x1, x21", "cbz x0, #+0x58", "sub x0, x0, #0x10 (16)", "tbnz x0, #63, #+0x3c", - "dup v1.8h, w21", + "dup v1.8h, w22", "sub x0, x0, #0x10 (16)", "tbnz x0, #63, #+0x14", "stp q1, q1, [x1], #32", @@ -756,15 +860,16 @@ "tbz x0, #63, #-0x8", "add x0, x0, #0x10 (16)", "cbz x0, #+0x10", - "strh w21, [x1], #2", + "strh w22, [x1], #2", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0x8", - "add x11, x11, x5, lsl #1", - "mov x5, x20" + "add x24, x21, x23, lsl #1", + "mov x5, x20", + "mov x11, x24" ] }, "positive rep stosd": { - "ExpectedInstructionCount": 30, + "ExpectedInstructionCount": 34, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -777,13 +882,16 @@ "mov w20, #0x0", "mov w21, #0x1", "strb w21, [x28, #714]", - "mov w21, w4", - "mov x0, x5", - "mov x1, x11", + "mov x21, x4", + "mov w22, w21", + "mov x21, x11", + "mov x23, x5", + "mov x0, x23", + "mov x1, x21", "cbz x0, #+0x58", "sub x0, x0, #0x8 (8)", "tbnz x0, #63, #+0x3c", - "dup v1.4s, w21", + "dup v1.4s, w22", "sub x0, x0, #0x8 (8)", "tbnz x0, #63, #+0x14", "stp q1, q1, [x1], #32", @@ -799,15 +907,16 @@ "tbz x0, #63, #-0x8", "add x0, x0, #0x8 (8)", "cbz x0, #+0x10", - "str w21, [x1], #4", + "str w22, [x1], #4", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0x8", - "add x11, x11, x5, lsl #2", - "mov x5, x20" + "add x24, x21, x23, lsl #2", + "mov x5, x20", + "mov x11, x24" ] }, "positive rep stosq": { - "ExpectedInstructionCount": 29, + "ExpectedInstructionCount": 33, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -820,12 +929,15 @@ "mov w20, #0x0", "mov w21, #0x1", "strb w21, [x28, #714]", - "mov x0, x5", - "mov x1, x11", + "mov x21, x4", + "mov x22, x11", + "mov x23, x5", + "mov x0, x23", + "mov x1, x22", "cbz x0, #+0x58", "sub x0, x0, #0x4 (4)", "tbnz x0, #63, #+0x3c", - "dup v1.2d, x4", + "dup v1.2d, x21", "sub x0, x0, #0x4 (4)", "tbnz x0, #63, #+0x14", "stp q1, q1, [x1], #32", @@ -841,15 +953,16 @@ "tbz x0, #63, #-0x8", "add x0, x0, #0x4 (4)", "cbz x0, #+0x10", - "str x4, [x1], #8", + "str x21, [x1], #8", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0x8", - "add x11, x11, x5, lsl #3", - "mov x5, x20" + "add x24, x22, x23, lsl #3", + "mov x5, x20", + "mov x11, x24" ] }, "negative rep stosb": { - "ExpectedInstructionCount": 31, + "ExpectedInstructionCount": 36, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -861,14 +974,17 @@ "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "strb w20, [x28, #714]", - "uxtb w20, w4", - "mov x0, x5", - "mov x1, x11", + "mov x20, x4", + "uxtb w21, w20", + "mov x20, x11", + "mov x22, x5", + "mov x0, x22", + "mov x1, x20", "cbz x0, #+0x60", "sub x1, x1, #0x1f (31)", "sub x0, x0, #0x20 (32)", "tbnz x0, #63, #+0x3c", - "dup v1.16b, w20", + "dup v1.16b, w21", "sub x0, x0, #0x20 (32)", "tbnz x0, #63, #+0x14", "stp q1, q1, [x1], #-32", @@ -885,15 +1001,17 @@ "add x0, x0, #0x20 (32)", "cbz x0, #+0x14", "add x1, x1, #0x1f (31)", - "strb w20, [x1], #-1", + "strb w21, [x1], #-1", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0x8", - "sub x11, x11, x5", - "mov w5, #0x0" + "sub x23, x20, x22", + "mov w20, #0x0", + "mov x5, x20", + "mov x11, x23" ] }, "negative rep stosw": { - "ExpectedInstructionCount": 31, + "ExpectedInstructionCount": 36, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -905,14 +1023,17 @@ "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "strb w20, [x28, #714]", - "uxth w20, w4", - "mov x0, x5", - "mov x1, x11", + "mov x20, x4", + "uxth w21, w20", + "mov x20, x11", + "mov x22, x5", + "mov x0, x22", + "mov x1, x20", "cbz x0, #+0x60", "sub x1, x1, #0x1e (30)", "sub x0, x0, #0x10 (16)", "tbnz x0, #63, #+0x3c", - "dup v1.8h, w20", + "dup v1.8h, w21", "sub x0, x0, #0x10 (16)", "tbnz x0, #63, #+0x14", "stp q1, q1, [x1], #-32", @@ -929,15 +1050,17 @@ "add x0, x0, #0x10 (16)", "cbz x0, #+0x14", "add x1, x1, #0x1e (30)", - "strh w20, [x1], #-2", + "strh w21, [x1], #-2", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0x8", - "sub x11, x11, x5, lsl #1", - "mov w5, #0x0" + "sub x23, x20, x22, lsl #1", + "mov w20, #0x0", + "mov x5, x20", + "mov x11, x23" ] }, "negative rep stosd": { - "ExpectedInstructionCount": 31, + "ExpectedInstructionCount": 36, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -949,14 +1072,17 @@ "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "strb w20, [x28, #714]", - "mov w20, w4", - "mov x0, x5", - "mov x1, x11", + "mov x20, x4", + "mov w21, w20", + "mov x20, x11", + "mov x22, x5", + "mov x0, x22", + "mov x1, x20", "cbz x0, #+0x60", "sub x1, x1, #0x1c (28)", "sub x0, x0, #0x8 (8)", "tbnz x0, #63, #+0x3c", - "dup v1.4s, w20", + "dup v1.4s, w21", "sub x0, x0, #0x8 (8)", "tbnz x0, #63, #+0x14", "stp q1, q1, [x1], #-32", @@ -973,15 +1099,17 @@ "add x0, x0, #0x8 (8)", "cbz x0, #+0x14", "add x1, x1, #0x1c (28)", - "str w20, [x1], #-4", + "str w21, [x1], #-4", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0x8", - "sub x11, x11, x5, lsl #2", - "mov w5, #0x0" + "sub x23, x20, x22, lsl #2", + "mov w20, #0x0", + "mov x5, x20", + "mov x11, x23" ] }, "negative rep stosq": { - "ExpectedInstructionCount": 30, + "ExpectedInstructionCount": 35, "Comment": [ "When direction flag is a compile time constant we can optimize", "loads and stores can turn in to post-increment when known" @@ -993,13 +1121,16 @@ "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "strb w20, [x28, #714]", - "mov x0, x5", - "mov x1, x11", + "mov x20, x4", + "mov x21, x11", + "mov x22, x5", + "mov x0, x22", + "mov x1, x21", "cbz x0, #+0x60", "sub x1, x1, #0x18 (24)", "sub x0, x0, #0x4 (4)", "tbnz x0, #63, #+0x3c", - "dup v1.2d, x4", + "dup v1.2d, x20", "sub x0, x0, #0x4 (4)", "tbnz x0, #63, #+0x14", "stp q1, q1, [x1], #-32", @@ -1016,15 +1147,17 @@ "add x0, x0, #0x4 (4)", "cbz x0, #+0x14", "add x1, x1, #0x18 (24)", - "str x4, [x1], #-8", + "str x20, [x1], #-8", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0x8", - "sub x11, x11, x5, lsl #3", - "mov w5, #0x0" + "sub x23, x21, x22, lsl #3", + "mov w20, #0x0", + "mov x5, x20", + "mov x11, x23" ] }, "Sekiro spill block": { - "ExpectedInstructionCount": 176, + "ExpectedInstructionCount": 322, "Comment": [ "This block of code came from the settings screen when it loaded", "It was originally at RIP: 0x14232cca0 and has been deobfuscated" @@ -1151,182 +1284,328 @@ "pop rbx" ], "ExpectedArm64ASM": [ - "str x5, [x8, #8]", - "str x7, [x8, #-8]!", - "str x9, [x8, #-8]!", - "str x10, [x8, #-8]!", - "str x11, [x8, #-8]!", - "str x16, [x8, #-8]!", - "str x17, [x8, #-8]!", - "str x19, [x8, #-8]!", - "str x29, [x8, #-8]!", - "sub x8, x8, #0x18 (24)", - "ldr w5, [x6, #36]", - "ldr w10, [x6]", - "ldr w9, [x6, #4]", - "ldr w19, [x6, #8]", - "ldr w29, [x6, #12]", - "ldr w16, [x6, #16]", - "ldr w17, [x6, #20]", - "ldr w15, [x6, #24]", - "ldr w7, [x6, #28]", - "ldr w11, [x6, #32]", - "mov w20, #0x13", - "mul w4, w5, w20", - "str w5, [x8, #104]", - "mov w21, #0x1000000", - "add w4, w4, w21", - "mov w21, w4", - "lsr w4, w21, #25", - "add w4, w4, w10", - "mov w21, w4", - "asr w4, w21, #26", - "add w4, w4, w9", - "mov w21, w4", - "asr w4, w21, #25", - "add w4, w4, w19", - "mov w21, w4", - "asr w4, w21, #26", - "add w4, w4, w29", - "mov w21, w4", - "asr w4, w21, #25", - "add w4, w4, w16", - "mov w21, w4", - "asr w4, w21, #26", - "add w4, w4, w17", - "mov w21, w4", - "asr w4, w21, #25", - "add w4, w4, w15", - "mov w21, w4", - "asr w4, w21, #26", - "add w4, w4, w7", - "mov w21, w4", - "asr w4, w21, #25", - "add w4, w4, w11", - "mov w21, w4", - "asr w4, w21, #26", - "add w4, w4, w5", - "mov w21, w4", - "asr w4, w21, #25", - "mul w4, w4, w20", - "add w10, w10, w4", - "mov w4, w10", - "asr w4, w4, #26", - "add w9, w9, w4", - "mov w20, w4", - "lsl w4, w20, #26", - "sub w10, w10, w4", - "mov w5, w9", - "ldr x20, [x8, #96]", + "sub sp, sp, #0x100 (256)", + "mov x20, x5", + "mov x21, x8", + "str x20, [x21, #8]", + "mov x20, x7", + "mov x22, x21", + "str x20, [x22, #-8]!", + "mov x8, x22", + "mov x20, x9", + "mov x21, x22", + "str x20, [x21, #-8]!", + "mov x8, x21", + "mov x20, x10", + "mov x22, x21", + "str x20, [x22, #-8]!", + "mov x8, x22", + "mov x20, x11", + "mov x21, x22", + "str x20, [x21, #-8]!", + "mov x8, x21", + "mov x20, x16", + "mov x22, x21", + "str x20, [x22, #-8]!", + "mov x8, x22", + "mov x20, x17", + "mov x21, x22", + "str x20, [x21, #-8]!", + "mov x8, x21", + "mov x20, x19", + "mov x22, x21", + "str x20, [x22, #-8]!", + "mov x8, x22", + "mov x20, x29", + "mov x21, x22", + "str x20, [x21, #-8]!", + "mov x8, x21", + "sub x20, x21, #0x18 (24)", + "mov x8, x20", + "mov x21, x6", + "ldr w22, [x21, #36]", + "mov x5, x22", + "ldr w23, [x21]", + "mov x10, x23", + "ldr w24, [x21, #4]", + "mov x9, x24", + "ldr w25, [x21, #8]", + "mov x19, x25", + "ldr w30, [x21, #12]", + "mov x29, x30", + "ldr w18, [x21, #16]", + "mov x16, x18", + "str w18, [sp]", + "ldr w18, [x21, #20]", + "mov x17, x18", + "str w18, [sp, #32]", + "ldr w18, [x21, #24]", + "mov x15, x18", + "str w18, [sp, #64]", + "ldr w18, [x21, #28]", + "mov x7, x18", + "str w18, [sp, #96]", + "ldr w18, [x21, #32]", + "mov x11, x18", + "mov w21, #0x13", + "str w18, [sp, #128]", + "mul w18, w22, w21", + "mov x4, x18", + "str w22, [x20, #104]", + "str x20, [sp, #160]", + "mov w20, #0x1000000", + "str w21, [sp, #192]", + "add w21, w18, w20", + "mov x4, x21", + "mov w20, w21", + "lsr w21, w20, #25", + "mov x4, x21", + "add w20, w21, w23", + "mov x4, x20", + "mov w21, w20", + "asr w20, w21, #26", + "mov x4, x20", + "add w21, w20, w24", + "mov x4, x21", + "mov w20, w21", + "asr w21, w20, #25", + "mov x4, x21", + "add w20, w21, w25", + "mov x4, x20", + "mov w21, w20", + "asr w20, w21, #26", + "mov x4, x20", + "add w21, w20, w30", + "mov x4, x21", + "mov w20, w21", + "asr w21, w20, #25", + "mov x4, x21", + "ldr w20, [sp]", + "add w18, w21, w20", + "mov x4, x18", + "mov w21, w18", + "asr w18, w21, #26", + "mov x4, x18", + "ldr w21, [sp, #32]", + "add w20, w18, w21", + "mov x4, x20", + "mov w18, w20", + "asr w20, w18, #25", + "mov x4, x20", + "ldr w18, [sp, #64]", + "add w21, w20, w18", + "mov x4, x21", + "mov w20, w21", + "asr w21, w20, #26", + "mov x4, x21", + "ldr w20, [sp, #96]", + "add w18, w21, w20", + "mov x4, x18", + "mov w21, w18", + "asr w18, w21, #25", + "mov x4, x18", + "ldr w21, [sp, #128]", + "add w20, w18, w21", "mov x4, x20", - "asr w5, w5, #25", - "add w19, w19, w5", - "mov w21, w5", - "lsl w5, w21, #25", - "mov w6, w19", - "sub w9, w9, w5", - "asr w6, w6, #26", - "add w29, w29, w6", - "mov w21, w10", - "str w21, [x20]", - "mov w12, w29", - "mov w21, w6", - "lsl w6, w21, #26", - "asr w12, w12, #25", - "sub w19, w19, w6", - "add w16, w16, w12", - "mov w21, w9", - "str w21, [x20, #4]", - "mov w13, w16", - "mov w21, w12", - "lsl w12, w21, #25", - "asr w13, w13, #26", - "sub w29, w29, w12", - "add w17, w17, w13", - "mov w21, w19", - "str w21, [x20, #8]", - "mov w21, w13", - "lsl w13, w21, #26", - "mov w14, w17", - "asr w14, w14, #25", - "sub w16, w16, w13", - "add w15, w15, w14", - "mov w21, w29", - "str w21, [x20, #12]", - "mov w21, w15", - "str w21, [x8, #112]", + "mov w18, w20", + "asr w20, w18, #26", + "mov x4, x20", + "add w18, w20, w22", + "mov x4, x18", + "mov w20, w18", + "asr w22, w20, #25", + "mov x4, x22", + "ldr w20, [sp, #192]", + "mul w18, w22, w20", + "mov x4, x18", + "add w20, w23, w18", "mov x10, x20", - "mov w21, w15", - "asr w15, w21, #26", - "add w7, w7, w15", - "mov w21, w16", - "str w21, [x20, #16]", - "mov w21, w7", - "str w21, [x8, #120]", - "mov w21, w7", - "asr w7, w21, #25", - "add w11, w11, w7", - "mov w21, w11", - "str w21, [x8]", - "mov w21, w11", - "asr w11, w21, #26", - "ldr w21, [x8, #104]", - "add w21, w21, w11", - "str w21, [x8, #104]", - "mov w21, w14", - "lsl w14, w21, #25", - "ldr w5, [x8, #104]", - "sub w17, w17, w14", - "mov w21, w17", - "str w21, [x20, #20]", - "ldr w4, [x8, #112]", - "mov w21, w15", - "lsl w15, w21, #26", - "sub w4, w4, w15", - "mov w21, w7", - "lsl w7, w21, #25", - "mov w21, w4", - "str w21, [x20, #24]", - "ldr w4, [x8, #120]", - "sub w4, w4, w7", - "mov w21, w11", - "lsl w11, w21, #26", - "mov w21, w4", - "str w21, [x20, #28]", - "ldr w4, [x8]", - "sub w4, w4, w11", - "mov w21, w4", - "str w21, [x20, #32]", - "mov x4, x5", - "and w4, w5, #0xfe000000", - "sub w5, w5, w4", - "mov w21, w5", - "str w21, [x20, #36]", - "mvn w27, w8", - "adds x26, x8, #0x18 (24)", - "mov x8, x26", - "ldr x29, [x26]", - "add x20, x26, #0x8 (8)", + "mov w22, w20", + "mov x4, x22", + "asr w23, w22, #26", + "mov x4, x23", + "add w22, w24, w23", + "mov x9, x22", + "mov w24, w23", + "lsl w23, w24, #26", + "mov x4, x23", + "sub w24, w20, w23", + "mov x10, x24", + "mov w20, w22", + "mov x5, x20", + "ldr x23, [sp, #160]", + "ldr x18, [x23, #96]", + "mov x4, x18", + "asr w21, w20, #25", + "mov x5, x21", + "add w20, w25, w21", + "mov x19, x20", + "mov w25, w21", + "lsl w21, w25, #25", + "mov x5, x21", + "mov w25, w20", + "mov x6, x25", + "sub w23, w22, w21", + "mov x9, x23", + "asr w21, w25, #26", + "mov x6, x21", + "add w22, w30, w21", + "mov x29, x22", + "mov w25, w24", + "str w25, [x18]", + "mov w24, w22", + "mov x12, x24", + "mov w25, w21", + "lsl w21, w25, #26", + "mov x6, x21", + "asr w25, w24, #25", + "mov x12, x25", + "sub w24, w20, w21", + "mov x19, x24", + "ldr w20, [sp]", + "add w21, w20, w25", + "mov x16, x21", + "mov w20, w23", + "str w20, [x18, #4]", + "mov w20, w21", + "mov x13, x20", + "mov w23, w25", + "lsl w25, w23, #25", + "mov x12, x25", + "asr w23, w20, #26", + "mov x13, x23", + "sub w20, w22, w25", + "mov x29, x20", + "ldr w22, [sp, #32]", + "add w25, w22, w23", + "mov x17, x25", + "mov w22, w24", + "str w22, [x18, #8]", + "mov w22, w23", + "lsl w23, w22, #26", + "mov x13, x23", + "mov w22, w25", + "mov x14, x22", + "asr w24, w22, #25", + "mov x14, x24", + "sub w22, w21, w23", + "mov x16, x22", + "ldr w21, [sp, #64]", + "add w23, w21, w24", + "mov x15, x23", + "mov w21, w20", + "str w21, [x18, #12]", + "mov w20, w23", + "ldr x21, [sp, #160]", + "str w20, [x21, #112]", + "mov x10, x18", + "mov w20, w23", + "asr w23, w20, #26", + "mov x15, x23", + "ldr w20, [sp, #96]", + "add w30, w20, w23", + "mov x7, x30", + "mov w20, w22", + "str w20, [x18, #16]", + "mov w20, w30", + "str w20, [x21, #120]", + "mov w20, w30", + "asr w22, w20, #25", + "mov x7, x22", + "ldr w20, [sp, #128]", + "add w30, w20, w22", + "mov x11, x30", + "mov w20, w30", + "str w20, [x21]", + "mov w20, w30", + "asr w30, w20, #26", + "mov x11, x30", + "ldr w20, [x21, #104]", + "str w22, [sp, #224]", + "add w22, w20, w30", + "str w22, [x21, #104]", + "mov w20, w24", + "lsl w22, w20, #25", + "mov x14, x22", + "ldr w20, [x21, #104]", + "mov x5, x20", + "sub w24, w25, w22", + "mov x17, x24", + "mov w22, w24", + "str w22, [x18, #20]", + "ldr w22, [x21, #112]", + "mov x4, x22", + "mov w24, w23", + "lsl w23, w24, #26", + "mov x15, x23", + "sub w24, w22, w23", + "mov x4, x24", + "ldr w22, [sp, #224]", + "mov w23, w22", + "lsl w22, w23, #25", + "mov x7, x22", + "mov w23, w24", + "str w23, [x18, #24]", + "ldr w23, [x21, #120]", + "mov x4, x23", + "sub w24, w23, w22", + "mov x4, x24", + "mov w22, w30", + "lsl w23, w22, #26", + "mov x11, x23", + "mov w22, w24", + "str w22, [x18, #28]", + "ldr w22, [x21]", + "mov x4, x22", + "sub w24, w22, w23", + "mov x4, x24", + "mov w22, w24", + "str w22, [x18, #32]", + "mov x4, x20", + "and w22, w20, #0xfe000000", + "mov x4, x22", + "sub w23, w20, w22", + "mov x5, x23", + "mov w20, w23", + "str w20, [x18, #36]", + "mvn w20, w21", + "mov x27, x20", + "adds x20, x21, #0x18 (24)", + "mov x26, x20", "mov x8, x20", - "ldr x19, [x26, #8]", - "add x21, x20, #0x8 (8)", - "mov x8, x21", - "ldr x17, [x20, #8]", - "add x20, x21, #0x8 (8)", + "ldr x21, [x20]", + "add x22, x20, #0x8 (8)", + "mov x8, x22", + "mov x29, x21", + "ldr x21, [x20, #8]", + "add x20, x22, #0x8 (8)", "mov x8, x20", - "ldr x16, [x21, #8]", - "add x21, x20, #0x8 (8)", - "mov x8, x21", - "ldr x11, [x20, #8]", - "add x20, x21, #0x8 (8)", + "mov x19, x21", + "ldr x21, [x22, #8]", + "add x22, x20, #0x8 (8)", + "mov x8, x22", + "mov x17, x21", + "ldr x21, [x20, #8]", + "add x20, x22, #0x8 (8)", "mov x8, x20", - "ldr x10, [x21, #8]", - "add x21, x20, #0x8 (8)", - "mov x8, x21", - "ldr x9, [x20, #8]", - "add x8, x21, #0x8 (8)", - "ldr x7, [x21, #8]", - "add x8, x8, #0x8 (8)" + "mov x16, x21", + "ldr x21, [x22, #8]", + "add x22, x20, #0x8 (8)", + "mov x8, x22", + "mov x11, x21", + "ldr x21, [x20, #8]", + "add x20, x22, #0x8 (8)", + "mov x8, x20", + "mov x10, x21", + "ldr x21, [x22, #8]", + "add x22, x20, #0x8 (8)", + "mov x8, x22", + "mov x9, x21", + "ldr x21, [x20, #8]", + "add x20, x22, #0x8 (8)", + "mov x8, x20", + "mov x7, x21", + "add sp, sp, #0x100 (256)" ] } } diff --git a/unittests/InstructionCountCI/FEXOpt/MultiInst_AFP.json b/unittests/InstructionCountCI/FEXOpt/MultiInst_AFP.json index 5644759ae7..9b23f4a55f 100644 --- a/unittests/InstructionCountCI/FEXOpt/MultiInst_AFP.json +++ b/unittests/InstructionCountCI/FEXOpt/MultiInst_AFP.json @@ -15,7 +15,7 @@ ], "Instructions": { "adds xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 9, "Comment": [ "Redundant scalar operations should get eliminated with AFP" ], @@ -24,8 +24,15 @@ "addss xmm0, xmm2" ], "ExpectedArm64ASM": [ - "fadd s16, s16, s17", - "fadd s16, s16, s18" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fadd s4, s2, s3", + "mov v16.16b, v4.16b", + "mov v2.16b, v18.16b", + "mov v3.16b, v4.16b", + "fadd s3, s4, s2", + "mov v16.16b, v3.16b" ] } } diff --git a/unittests/InstructionCountCI/FEXOpt/libnss.json b/unittests/InstructionCountCI/FEXOpt/libnss.json index 0fd3a1537a..ccbbab2dcc 100644 --- a/unittests/InstructionCountCI/FEXOpt/libnss.json +++ b/unittests/InstructionCountCI/FEXOpt/libnss.json @@ -16,7 +16,7 @@ "Comment": [], "Instructions": { "libnss3 sha": { - "ExpectedInstructionCount": 2391, + "ExpectedInstructionCount": 2671, "Comment": [ "This block of code comes from libnss3 which causes panic spilling in FEX's RA.", "This code is hit in steamwebhelper calling in to this function.", @@ -193,2397 +193,2677 @@ "movups [rdi+0x110], xmm4" ], "ExpectedArm64ASM": [ - "ldr q18, [x11, #256]", - "ldr q19, [x11, #272]", - "ldr q24, [x11]", - "ldr q23, [x11, #16]", + "sub sp, sp, #0x120 (288)", + "mov x20, x11", + "ldr q2, [x20, #256]", + "mov v18.16b, v2.16b", + "ldr q3, [x20, #272]", + "mov v19.16b, v3.16b", + "ldr q4, [x20]", + "mov v24.16b, v4.16b", + "ldr q5, [x20, #16]", + "mov v23.16b, v5.16b", "ldr x0, [x28, #1760]", - "ldr q2, [x0, #2832]", - "tbl v16.16b, {v18.16b}, v2.16b", + "ldr q6, [x0, #2832]", + "tbl v7.16b, {v2.16b}, v6.16b", + "mov v16.16b, v7.16b", "ldr x0, [x28, #1760]", - "ldr q3, [x0, #432]", - "tbl v18.16b, {v19.16b}, v3.16b", - "ldr q22, [x11, #32]", - "ldr q21, [x11, #48]", - "mov v19.16b, v16.16b", - "ext v19.16b, v18.16b, v16.16b, #8", - "mov v18.d[1], v16.d[1]", - "mov w20, #0x1000", - "movk w20, #0x1, lsl #16", - "ldr q16, [x29, x20, sxtx]", - "mov v20.16b, v18.16b", - "mov v17.16b, v19.16b", - "movi v4.16b, #0x8f", - "and v4.16b, v16.16b, v4.16b", - "tbl v21.16b, {v21.16b}, v4.16b", - "movi v4.16b, #0x8f", - "and v4.16b, v16.16b, v4.16b", - "tbl v22.16b, {v22.16b}, v4.16b", - "movi v4.16b, #0x8f", - "and v4.16b, v16.16b, v4.16b", - "tbl v23.16b, {v23.16b}, v4.16b", - "movi v4.16b, #0x8f", - "and v4.16b, v16.16b, v4.16b", - "tbl v24.16b, {v24.16b}, v4.16b", - "ldr q16, [x29, x20, sxtx]", - "add v16.4s, v16.4s, v24.4s", + "ldr q2, [x0, #432]", + "tbl v8.16b, {v3.16b}, v2.16b", + "mov v18.16b, v8.16b", + "ldr q3, [x20, #32]", + "mov v22.16b, v3.16b", + "ldr q9, [x20, #48]", + "mov v21.16b, v9.16b", + "mov v19.16b, v7.16b", + "ext v10.16b, v8.16b, v7.16b, #8", + "mov v19.16b, v10.16b", + "mov v11.16b, v8.16b", + "mov v11.d[1], v7.d[1]", + "mov v18.16b, v11.16b", + "mov x21, x29", + "mov w22, #0x1000", + "movk w22, #0x1, lsl #16", + "ldr q7, [x21, x22, sxtx]", + "mov v16.16b, v7.16b", + "mov v20.16b, v11.16b", + "mov v17.16b, v10.16b", + "movi v8.16b, #0x8f", + "and v12.16b, v7.16b, v8.16b", + "tbl v8.16b, {v9.16b}, v12.16b", + "mov v21.16b, v8.16b", + "movi v9.16b, #0x8f", + "and v12.16b, v7.16b, v9.16b", + "tbl v9.16b, {v3.16b}, v12.16b", + "mov v22.16b, v9.16b", + "movi v3.16b, #0x8f", + "and v12.16b, v7.16b, v3.16b", + "tbl v3.16b, {v5.16b}, v12.16b", + "mov v23.16b, v3.16b", + "movi v5.16b, #0x8f", + "and v12.16b, v7.16b, v5.16b", + "tbl v5.16b, {v4.16b}, v12.16b", + "mov v24.16b, v5.16b", + "ldr q4, [x21, x22, sxtx]", + "mov v16.16b, v4.16b", + "add v7.4s, v4.4s, v5.4s", + "mov v16.16b, v7.16b", + "mov v0.16b, v5.16b", "unimplemented (Unimplemented)", - "mov w20, v19.s[1]", - "mov w21, v19.s[0]", - "mov w22, v18.s[1]", - "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", - "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v18.s[0]", - "add w22, w22, w23", - "mov w23, v19.s[3]", - "mov w24, v19.s[2]", - "mov w25, v18.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v18.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v18.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v18.s[3]", - "add w20, w20, w23", - "mov v4.16b, v18.16b", - "mov v4.s[3], w21", - "mov v4.s[2], w25", - "mov v4.s[1], w20", - "mov v20.16b, v4.16b", - "mov v20.s[0], w22", + "mov v4.16b, v0.16b", + "mov v24.16b, v4.16b", + "mov w23, v10.s[1]", + "mov w24, v10.s[0]", + "mov w25, v11.s[1]", + "and w30, w23, w24", + "bic w18, w25, w23", + "eor w25, w30, w18", + "ror w30, w23, #6", + "eor w18, w30, w23, ror #11", + "eor w30, w18, w23, ror #25", + "add w18, w25, w30", + "mov w25, v7.s[0]", + "add w30, w18, w25", + "mov w25, v11.s[0]", + "add w18, w30, w25", + "mov w25, v10.s[3]", + "mov w30, v10.s[2]", + "str x20, [sp]", + "mov w20, v11.s[3]", + "str x21, [sp, #32]", + "and w21, w30, w20", + "str x22, [sp, #64]", + "orr w22, w30, w20", + "and w20, w25, w22", + "orr w22, w20, w21", + "add w20, w18, w22", + "ror w21, w25, #2", + "eor w22, w21, w25, ror #13", + "eor w21, w22, w25, ror #22", + "add w22, w20, w21", + "mov w20, v11.s[2]", + "add w21, w18, w20", + "and w20, w21, w23", + "bic w23, w24, w21", + "eor w24, w20, w23", + "ror w20, w21, #6", + "eor w23, w20, w21, ror #11", + "eor w20, w23, w21, ror #25", + "add w23, w24, w20", + "mov w20, v7.s[1]", + "add w24, w23, w20", + "mov w20, v11.s[1]", + "add w23, w24, w20", + "and w20, w25, w30", + "orr w24, w25, w30", + "and w25, w22, w24", + "orr w24, w25, w20", + "add w20, w23, w24", + "ror w24, w22, #2", + "eor w25, w24, w22, ror #13", + "eor w24, w25, w22, ror #22", + "add w25, w20, w24", + "mov w20, v11.s[3]", + "add w24, w23, w20", + "mov v5.16b, v11.16b", + "mov v5.s[3], w25", + "mov v12.16b, v5.16b", + "mov v12.s[2], w22", + "mov v5.16b, v12.16b", + "mov v5.s[1], w24", + "mov v12.16b, v5.16b", + "mov v12.s[0], w21", + "mov v20.16b, v12.16b", "ldr x0, [x28, #1760]", - "ldr q4, [x0, #224]", - "tbl v16.16b, {v16.16b}, v4.16b", - "mov w20, v20.s[1]", - "mov w21, v20.s[0]", - "mov w22, v19.s[1]", + "ldr q5, [x0, #224]", + "tbl v13.16b, {v7.16b}, v5.16b", + "mov v16.16b, v13.16b", + "mov w20, v12.s[1]", + "mov w21, v12.s[0]", + "mov w22, v10.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v19.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[3]", - "mov w24, v20.s[2]", - "mov w25, v19.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v19.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v19.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v19.s[3]", - "add w20, w20, w23", - "mov v5.16b, v19.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v17.16b, v5.16b", - "mov v17.s[0], w22", - "mov v16.16b, v21.16b", - "ext v16.16b, v22.16b, v21.16b, #4", - "add v24.4s, v24.4s, v16.4s", - "mov w20, #0x1000", - "movk w20, #0x1, lsl #16", - "ldr q16, [x29, x20, sxtx]", - "mov w20, v21.s[2]", - "mov w21, v21.s[3]", - "mov w22, v24.s[0]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w20, w20, #10", - "eor w20, w23, w20", - "add w20, w22, w20", - "mov w22, v24.s[1]", - "ror w23, w21, #17", - "ror w24, w21, #19", - "eor w23, w23, w24", - "lsr w21, w21, #10", - "eor w21, w23, w21", - "add w21, w22, w21", - "mov w22, v24.s[2]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w24, w20, #10", - "eor w23, w23, w24", - "add w22, w22, w23", - "mov w23, v24.s[3]", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v13.s[0]", + "add w23, w24, w22", + "mov w22, v10.s[0]", + "add w24, w23, w22", + "mov w22, v12.s[3]", + "mov w23, v12.s[2]", + "mov w25, v10.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v10.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v13.s[1]", + "add w21, w24, w20", + "mov w20, v10.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v10.s[3]", + "add w21, w24, w20", + "mov v7.16b, v10.16b", + "mov v7.s[3], w22", + "mov v13.16b, v7.16b", + "mov v13.s[2], w18", + "mov v7.16b, v13.16b", + "mov v7.s[1], w21", + "mov v13.16b, v7.16b", + "mov v13.s[0], w30", + "mov v17.16b, v13.16b", + "mov v16.16b, v8.16b", + "ext v7.16b, v9.16b, v8.16b, #4", + "mov v16.16b, v7.16b", + "add v14.4s, v4.4s, v7.4s", + "mov v24.16b, v14.16b", + "ldr x20, [sp, #32]", + "ldr x21, [sp, #64]", + "ldr q4, [x20, x21, sxtx]", + "mov v16.16b, v4.16b", + "mov w21, v8.s[2]", + "mov w22, v8.s[3]", + "mov w23, v14.s[0]", "ror w24, w21, #17", "ror w25, w21, #19", - "eor w24, w24, w25", - "lsr w25, w21, #10", - "eor w24, w24, w25", - "add w23, w23, w24", - "mov v5.16b, v24.16b", - "mov v5.s[3], w23", - "mov v5.s[2], w22", - "mov v5.s[1], w21", - "mov v24.16b, v5.16b", - "mov v24.s[0], w20", - "add v16.4s, v16.4s, v23.4s", + "eor w30, w24, w25", + "lsr w24, w21, #10", + "eor w21, w30, w24", + "add w24, w23, w21", + "mov w21, v14.s[1]", + "ror w23, w22, #17", + "ror w25, w22, #19", + "eor w30, w23, w25", + "lsr w23, w22, #10", + "eor w22, w30, w23", + "add w23, w21, w22", + "mov w21, v14.s[2]", + "ror w22, w24, #17", + "ror w25, w24, #19", + "eor w30, w22, w25", + "lsr w22, w24, #10", + "eor w25, w30, w22", + "add w22, w21, w25", + "mov w21, v14.s[3]", + "ror w25, w23, #17", + "ror w30, w23, #19", + "eor w18, w25, w30", + "lsr w25, w23, #10", + "eor w30, w18, w25", + "add w25, w21, w30", + "mov v7.16b, v14.16b", + "mov v7.s[3], w25", + "mov v14.16b, v7.16b", + "mov v14.s[2], w22", + "mov v7.16b, v14.16b", + "mov v7.s[1], w23", + "mov v14.16b, v7.16b", + "mov v14.s[0], w24", + "mov v24.16b, v14.16b", + "add v7.4s, v4.4s, v3.4s", + "mov v16.16b, v7.16b", + "mov v0.16b, v3.16b", "unimplemented (Unimplemented)", - "mov w20, v17.s[1]", - "mov w21, v17.s[0]", - "mov w22, v20.s[1]", - "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", - "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[3]", - "mov w24, v17.s[2]", - "mov w25, v20.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v20.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v20.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v20.s[3]", - "add w20, w20, w23", - "mov v5.16b, v20.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v20.16b, v5.16b", - "mov v20.s[0], w22", - "tbl v16.16b, {v16.16b}, v4.16b", - "mov w20, v20.s[1]", - "mov w21, v20.s[0]", - "mov w22, v17.s[1]", + "mov v4.16b, v0.16b", + "mov v23.16b, v4.16b", + "mov w21, v13.s[1]", + "mov w22, v13.s[0]", + "mov w23, v12.s[1]", + "and w24, w21, w22", + "bic w25, w23, w21", + "eor w23, w24, w25", + "ror w24, w21, #6", + "eor w25, w24, w21, ror #11", + "eor w24, w25, w21, ror #25", + "add w25, w23, w24", + "mov w23, v7.s[0]", + "add w24, w25, w23", + "mov w23, v12.s[0]", + "add w25, w24, w23", + "mov w23, v13.s[3]", + "mov w24, v13.s[2]", + "mov w30, v12.s[3]", + "and w18, w24, w30", + "orr w20, w24, w30", + "and w30, w23, w20", + "orr w20, w30, w18", + "add w30, w25, w20", + "ror w20, w23, #2", + "eor w18, w20, w23, ror #13", + "eor w20, w18, w23, ror #22", + "add w18, w30, w20", + "mov w20, v12.s[2]", + "add w30, w25, w20", + "and w20, w30, w21", + "bic w21, w22, w30", + "eor w22, w20, w21", + "ror w20, w30, #6", + "eor w21, w20, w30, ror #11", + "eor w20, w21, w30, ror #25", + "add w21, w22, w20", + "mov w20, v7.s[1]", + "add w22, w21, w20", + "mov w20, v12.s[1]", + "add w21, w22, w20", + "and w20, w23, w24", + "orr w22, w23, w24", + "and w23, w18, w22", + "orr w22, w23, w20", + "add w20, w21, w22", + "ror w22, w18, #2", + "eor w23, w22, w18, ror #13", + "eor w22, w23, w18, ror #22", + "add w23, w20, w22", + "mov w20, v12.s[3]", + "add w22, w21, w20", + "mov v3.16b, v12.16b", + "mov v3.s[3], w23", + "mov v12.16b, v3.16b", + "mov v12.s[2], w18", + "mov v3.16b, v12.16b", + "mov v3.s[1], w22", + "mov v12.16b, v3.16b", + "mov v12.s[0], w30", + "mov v20.16b, v12.16b", + "tbl v3.16b, {v7.16b}, v5.16b", + "mov v16.16b, v3.16b", + "mov w20, v12.s[1]", + "mov w21, v12.s[0]", + "mov w22, v13.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[3]", - "mov w24, v20.s[2]", - "mov w25, v17.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v17.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v17.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v17.s[3]", - "add w20, w20, w23", - "mov v5.16b, v17.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v17.16b, v5.16b", - "mov v17.s[0], w22", - "mov v16.16b, v24.16b", - "ext v16.16b, v21.16b, v24.16b, #4", - "add v23.4s, v23.4s, v16.4s", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v3.s[0]", + "add w23, w24, w22", + "mov w22, v13.s[0]", + "add w24, w23, w22", + "mov w22, v12.s[3]", + "mov w23, v12.s[2]", + "mov w25, v13.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v13.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v3.s[1]", + "add w21, w24, w20", + "mov w20, v13.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v13.s[3]", + "add w21, w24, w20", + "mov v3.16b, v13.16b", + "mov v3.s[3], w22", + "mov v7.16b, v3.16b", + "mov v7.s[2], w18", + "mov v3.16b, v7.16b", + "mov v3.s[1], w21", + "mov v7.16b, v3.16b", + "mov v7.s[0], w30", + "mov v17.16b, v7.16b", + "mov v16.16b, v14.16b", + "ext v3.16b, v8.16b, v14.16b, #4", + "mov v16.16b, v3.16b", + "add v13.4s, v4.4s, v3.4s", + "mov v23.16b, v13.16b", "mov w20, #0x1000", "movk w20, #0x1, lsl #16", - "ldr q16, [x29, x20, sxtx]", - "mov w20, v24.s[2]", - "mov w21, v24.s[3]", - "mov w22, v23.s[0]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w20, w20, #10", - "eor w20, w23, w20", - "add w20, w22, w20", - "mov w22, v23.s[1]", - "ror w23, w21, #17", - "ror w24, w21, #19", - "eor w23, w23, w24", - "lsr w21, w21, #10", - "eor w21, w23, w21", - "add w21, w22, w21", - "mov w22, v23.s[2]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w24, w20, #10", - "eor w23, w23, w24", - "add w22, w22, w23", - "mov w23, v23.s[3]", - "ror w24, w21, #17", - "ror w25, w21, #19", - "eor w24, w24, w25", - "lsr w25, w21, #10", - "eor w24, w24, w25", - "add w23, w23, w24", - "mov v5.16b, v23.16b", - "mov v5.s[3], w23", - "mov v5.s[2], w22", - "mov v5.s[1], w21", - "mov v23.16b, v5.16b", - "mov v23.s[0], w20", - "add v16.4s, v16.4s, v22.4s", + "ldr x21, [sp, #32]", + "ldr q3, [x21, x20, sxtx]", + "mov v16.16b, v3.16b", + "mov w22, v14.s[2]", + "mov w23, v14.s[3]", + "mov w24, v13.s[0]", + "ror w25, w22, #17", + "ror w30, w22, #19", + "eor w18, w25, w30", + "lsr w25, w22, #10", + "eor w22, w18, w25", + "add w25, w24, w22", + "mov w22, v13.s[1]", + "ror w24, w23, #17", + "ror w30, w23, #19", + "eor w18, w24, w30", + "lsr w24, w23, #10", + "eor w23, w18, w24", + "add w24, w22, w23", + "mov w22, v13.s[2]", + "ror w23, w25, #17", + "ror w30, w25, #19", + "eor w18, w23, w30", + "lsr w23, w25, #10", + "eor w30, w18, w23", + "add w23, w22, w30", + "mov w22, v13.s[3]", + "ror w30, w24, #17", + "ror w18, w24, #19", + "str x20, [sp, #96]", + "eor w20, w30, w18", + "lsr w30, w24, #10", + "eor w18, w20, w30", + "add w20, w22, w18", + "mov v4.16b, v13.16b", + "mov v4.s[3], w20", + "mov v13.16b, v4.16b", + "mov v13.s[2], w23", + "mov v4.16b, v13.16b", + "mov v4.s[1], w24", + "mov v13.16b, v4.16b", + "mov v13.s[0], w25", + "mov v23.16b, v13.16b", + "add v4.4s, v3.4s, v9.4s", + "mov v16.16b, v4.16b", + "mov v0.16b, v9.16b", "unimplemented (Unimplemented)", - "mov w20, v17.s[1]", - "mov w21, v17.s[0]", - "mov w22, v20.s[1]", - "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", - "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[3]", - "mov w24, v17.s[2]", - "mov w25, v20.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v20.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v20.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v20.s[3]", - "add w20, w20, w23", - "mov v5.16b, v20.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v20.16b, v5.16b", - "mov v20.s[0], w22", - "tbl v16.16b, {v16.16b}, v4.16b", - "mov w20, v20.s[1]", - "mov w21, v20.s[0]", - "mov w22, v17.s[1]", + "mov v3.16b, v0.16b", + "mov v22.16b, v3.16b", + "mov w20, v7.s[1]", + "mov w22, v7.s[0]", + "mov w23, v12.s[1]", + "and w24, w20, w22", + "bic w25, w23, w20", + "eor w23, w24, w25", + "ror w24, w20, #6", + "eor w25, w24, w20, ror #11", + "eor w24, w25, w20, ror #25", + "add w25, w23, w24", + "mov w23, v4.s[0]", + "add w24, w25, w23", + "mov w23, v12.s[0]", + "add w25, w24, w23", + "mov w23, v7.s[3]", + "mov w24, v7.s[2]", + "mov w30, v12.s[3]", + "and w18, w24, w30", + "orr w21, w24, w30", + "and w30, w23, w21", + "orr w21, w30, w18", + "add w30, w25, w21", + "ror w21, w23, #2", + "eor w18, w21, w23, ror #13", + "eor w21, w18, w23, ror #22", + "add w18, w30, w21", + "mov w21, v12.s[2]", + "add w30, w25, w21", + "and w21, w30, w20", + "bic w20, w22, w30", + "eor w22, w21, w20", + "ror w20, w30, #6", + "eor w21, w20, w30, ror #11", + "eor w20, w21, w30, ror #25", + "add w21, w22, w20", + "mov w20, v4.s[1]", + "add w22, w21, w20", + "mov w20, v12.s[1]", + "add w21, w22, w20", + "and w20, w23, w24", + "orr w22, w23, w24", + "and w23, w18, w22", + "orr w22, w23, w20", + "add w20, w21, w22", + "ror w22, w18, #2", + "eor w23, w22, w18, ror #13", + "eor w22, w23, w18, ror #22", + "add w23, w20, w22", + "mov w20, v12.s[3]", + "add w22, w21, w20", + "mov v9.16b, v12.16b", + "mov v9.s[3], w23", + "mov v12.16b, v9.16b", + "mov v12.s[2], w18", + "mov v9.16b, v12.16b", + "mov v9.s[1], w22", + "mov v12.16b, v9.16b", + "mov v12.s[0], w30", + "mov v20.16b, v12.16b", + "tbl v9.16b, {v4.16b}, v5.16b", + "mov v16.16b, v9.16b", + "mov w20, v12.s[1]", + "mov w21, v12.s[0]", + "mov w22, v7.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[3]", - "mov w24, v20.s[2]", - "mov w25, v17.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v17.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v17.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v17.s[3]", - "add w20, w20, w23", - "mov v5.16b, v17.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v17.16b, v5.16b", - "mov v17.s[0], w22", - "mov v16.16b, v23.16b", - "ext v16.16b, v24.16b, v23.16b, #4", - "add v22.4s, v22.4s, v16.4s", - "mov w20, #0x1000", - "movk w20, #0x1, lsl #16", - "ldr q16, [x29, x20, sxtx]", - "mov w20, v23.s[2]", - "mov w21, v23.s[3]", - "mov w22, v22.s[0]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w20, w20, #10", - "eor w20, w23, w20", - "add w20, w22, w20", - "mov w22, v22.s[1]", - "ror w23, w21, #17", - "ror w24, w21, #19", - "eor w23, w23, w24", - "lsr w21, w21, #10", - "eor w21, w23, w21", - "add w21, w22, w21", - "mov w22, v22.s[2]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w24, w20, #10", - "eor w23, w23, w24", - "add w22, w22, w23", - "mov w23, v22.s[3]", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v9.s[0]", + "add w23, w24, w22", + "mov w22, v7.s[0]", + "add w24, w23, w22", + "mov w22, v12.s[3]", + "mov w23, v12.s[2]", + "mov w25, v7.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v7.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v9.s[1]", + "add w21, w24, w20", + "mov w20, v7.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v7.s[3]", + "add w21, w24, w20", + "mov v4.16b, v7.16b", + "mov v4.s[3], w22", + "mov v7.16b, v4.16b", + "mov v7.s[2], w18", + "mov v4.16b, v7.16b", + "mov v4.s[1], w21", + "mov v7.16b, v4.16b", + "mov v7.s[0], w30", + "mov v17.16b, v7.16b", + "mov v16.16b, v13.16b", + "ext v4.16b, v14.16b, v13.16b, #4", + "mov v16.16b, v4.16b", + "add v9.4s, v3.4s, v4.4s", + "mov v22.16b, v9.16b", + "ldr x20, [sp, #32]", + "ldr x21, [sp, #96]", + "ldr q3, [x20, x21, sxtx]", + "mov v16.16b, v3.16b", + "mov w21, v13.s[2]", + "mov w22, v13.s[3]", + "mov w23, v9.s[0]", "ror w24, w21, #17", "ror w25, w21, #19", - "eor w24, w24, w25", - "lsr w25, w21, #10", - "eor w24, w24, w25", - "add w23, w23, w24", - "mov v5.16b, v22.16b", - "mov v5.s[3], w23", - "mov v5.s[2], w22", - "mov v5.s[1], w21", - "mov v22.16b, v5.16b", - "mov v22.s[0], w20", - "add v16.4s, v16.4s, v21.4s", + "eor w30, w24, w25", + "lsr w24, w21, #10", + "eor w21, w30, w24", + "add w24, w23, w21", + "mov w21, v9.s[1]", + "ror w23, w22, #17", + "ror w25, w22, #19", + "eor w30, w23, w25", + "lsr w23, w22, #10", + "eor w22, w30, w23", + "add w23, w21, w22", + "mov w21, v9.s[2]", + "ror w22, w24, #17", + "ror w25, w24, #19", + "eor w30, w22, w25", + "lsr w22, w24, #10", + "eor w25, w30, w22", + "add w22, w21, w25", + "mov w21, v9.s[3]", + "ror w25, w23, #17", + "ror w30, w23, #19", + "eor w18, w25, w30", + "lsr w25, w23, #10", + "eor w30, w18, w25", + "add w25, w21, w30", + "mov v4.16b, v9.16b", + "mov v4.s[3], w25", + "mov v9.16b, v4.16b", + "mov v9.s[2], w22", + "mov v4.16b, v9.16b", + "mov v4.s[1], w23", + "mov v9.16b, v4.16b", + "mov v9.s[0], w24", + "mov v22.16b, v9.16b", + "add v4.4s, v3.4s, v8.4s", + "mov v16.16b, v4.16b", + "mov v0.16b, v8.16b", "unimplemented (Unimplemented)", - "mov w20, v17.s[1]", - "mov w21, v17.s[0]", - "mov w22, v20.s[1]", + "mov v3.16b, v0.16b", + "mov v21.16b, v3.16b", + "mov w21, v7.s[1]", + "mov w22, v7.s[0]", + "mov w23, v12.s[1]", + "and w24, w21, w22", + "bic w25, w23, w21", + "eor w23, w24, w25", + "ror w24, w21, #6", + "eor w25, w24, w21, ror #11", + "eor w24, w25, w21, ror #25", + "add w25, w23, w24", + "mov w23, v4.s[0]", + "add w24, w25, w23", + "mov w23, v12.s[0]", + "add w25, w24, w23", + "mov w23, v7.s[3]", + "mov w24, v7.s[2]", + "mov w30, v12.s[3]", + "and w18, w24, w30", + "orr w20, w24, w30", + "and w30, w23, w20", + "orr w20, w30, w18", + "add w30, w25, w20", + "ror w20, w23, #2", + "eor w18, w20, w23, ror #13", + "eor w20, w18, w23, ror #22", + "add w18, w30, w20", + "mov w20, v12.s[2]", + "add w30, w25, w20", + "and w20, w30, w21", + "bic w21, w22, w30", + "eor w22, w20, w21", + "ror w20, w30, #6", + "eor w21, w20, w30, ror #11", + "eor w20, w21, w30, ror #25", + "add w21, w22, w20", + "mov w20, v4.s[1]", + "add w22, w21, w20", + "mov w20, v12.s[1]", + "add w21, w22, w20", + "and w20, w23, w24", + "orr w22, w23, w24", + "and w23, w18, w22", + "orr w22, w23, w20", + "add w20, w21, w22", + "ror w22, w18, #2", + "eor w23, w22, w18, ror #13", + "eor w22, w23, w18, ror #22", + "add w23, w20, w22", + "mov w20, v12.s[3]", + "add w22, w21, w20", + "mov v8.16b, v12.16b", + "mov v8.s[3], w23", + "mov v12.16b, v8.16b", + "mov v12.s[2], w18", + "mov v8.16b, v12.16b", + "mov v8.s[1], w22", + "mov v12.16b, v8.16b", + "mov v12.s[0], w30", + "mov v20.16b, v12.16b", + "tbl v8.16b, {v4.16b}, v5.16b", + "mov v16.16b, v8.16b", + "mov w20, v12.s[1]", + "mov w21, v12.s[0]", + "mov w22, v7.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[3]", - "mov w24, v17.s[2]", - "mov w25, v20.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v20.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v20.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v20.s[3]", - "add w20, w20, w23", - "mov v5.16b, v20.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v20.16b, v5.16b", - "mov v20.s[0], w22", - "tbl v16.16b, {v16.16b}, v4.16b", - "mov w20, v20.s[1]", - "mov w21, v20.s[0]", - "mov w22, v17.s[1]", - "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", - "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[3]", - "mov w24, v20.s[2]", - "mov w25, v17.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v17.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v17.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v17.s[3]", - "add w20, w20, w23", - "mov v5.16b, v17.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v17.16b, v5.16b", - "mov v17.s[0], w22", - "mov v16.16b, v22.16b", - "ext v16.16b, v23.16b, v22.16b, #4", - "add v21.4s, v21.4s, v16.4s", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v8.s[0]", + "add w23, w24, w22", + "mov w22, v7.s[0]", + "add w24, w23, w22", + "mov w22, v12.s[3]", + "mov w23, v12.s[2]", + "mov w25, v7.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v7.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v8.s[1]", + "add w21, w24, w20", + "mov w20, v7.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v7.s[3]", + "add w21, w24, w20", + "mov v4.16b, v7.16b", + "mov v4.s[3], w22", + "mov v7.16b, v4.16b", + "mov v7.s[2], w18", + "mov v4.16b, v7.16b", + "mov v4.s[1], w21", + "mov v7.16b, v4.16b", + "mov v7.s[0], w30", + "mov v17.16b, v7.16b", + "mov v16.16b, v9.16b", + "ext v4.16b, v13.16b, v9.16b, #4", + "mov v16.16b, v4.16b", + "add v8.4s, v3.4s, v4.4s", + "mov v21.16b, v8.16b", "mov w20, #0x1000", "movk w20, #0x1, lsl #16", - "ldr q16, [x29, x20, sxtx]", - "mov w20, v22.s[2]", - "mov w21, v22.s[3]", - "mov w22, v21.s[0]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w20, w20, #10", - "eor w20, w23, w20", - "add w20, w22, w20", - "mov w22, v21.s[1]", - "ror w23, w21, #17", - "ror w24, w21, #19", - "eor w23, w23, w24", - "lsr w21, w21, #10", - "eor w21, w23, w21", - "add w21, w22, w21", - "mov w22, v21.s[2]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w24, w20, #10", - "eor w23, w23, w24", - "add w22, w22, w23", - "mov w23, v21.s[3]", - "ror w24, w21, #17", - "ror w25, w21, #19", - "eor w24, w24, w25", - "lsr w25, w21, #10", - "eor w24, w24, w25", - "add w23, w23, w24", - "mov v5.16b, v21.16b", - "mov v5.s[3], w23", - "mov v5.s[2], w22", - "mov v5.s[1], w21", - "mov v21.16b, v5.16b", - "mov v21.s[0], w20", - "add v16.4s, v16.4s, v24.4s", + "ldr x21, [sp, #32]", + "ldr q3, [x21, x20, sxtx]", + "mov v16.16b, v3.16b", + "mov w22, v9.s[2]", + "mov w23, v9.s[3]", + "mov w24, v8.s[0]", + "ror w25, w22, #17", + "ror w30, w22, #19", + "eor w18, w25, w30", + "lsr w25, w22, #10", + "eor w22, w18, w25", + "add w25, w24, w22", + "mov w22, v8.s[1]", + "ror w24, w23, #17", + "ror w30, w23, #19", + "eor w18, w24, w30", + "lsr w24, w23, #10", + "eor w23, w18, w24", + "add w24, w22, w23", + "mov w22, v8.s[2]", + "ror w23, w25, #17", + "ror w30, w25, #19", + "eor w18, w23, w30", + "lsr w23, w25, #10", + "eor w30, w18, w23", + "add w23, w22, w30", + "mov w22, v8.s[3]", + "ror w30, w24, #17", + "ror w18, w24, #19", + "str x20, [sp, #128]", + "eor w20, w30, w18", + "lsr w30, w24, #10", + "eor w18, w20, w30", + "add w20, w22, w18", + "mov v4.16b, v8.16b", + "mov v4.s[3], w20", + "mov v8.16b, v4.16b", + "mov v8.s[2], w23", + "mov v4.16b, v8.16b", + "mov v4.s[1], w24", + "mov v8.16b, v4.16b", + "mov v8.s[0], w25", + "mov v21.16b, v8.16b", + "add v4.4s, v3.4s, v14.4s", + "mov v16.16b, v4.16b", + "mov v0.16b, v14.16b", "unimplemented (Unimplemented)", - "mov w20, v17.s[1]", - "mov w21, v17.s[0]", - "mov w22, v20.s[1]", + "mov v3.16b, v0.16b", + "mov v24.16b, v3.16b", + "mov w20, v7.s[1]", + "mov w22, v7.s[0]", + "mov w23, v12.s[1]", + "and w24, w20, w22", + "bic w25, w23, w20", + "eor w23, w24, w25", + "ror w24, w20, #6", + "eor w25, w24, w20, ror #11", + "eor w24, w25, w20, ror #25", + "add w25, w23, w24", + "mov w23, v4.s[0]", + "add w24, w25, w23", + "mov w23, v12.s[0]", + "add w25, w24, w23", + "mov w23, v7.s[3]", + "mov w24, v7.s[2]", + "mov w30, v12.s[3]", + "and w18, w24, w30", + "orr w21, w24, w30", + "and w30, w23, w21", + "orr w21, w30, w18", + "add w30, w25, w21", + "ror w21, w23, #2", + "eor w18, w21, w23, ror #13", + "eor w21, w18, w23, ror #22", + "add w18, w30, w21", + "mov w21, v12.s[2]", + "add w30, w25, w21", + "and w21, w30, w20", + "bic w20, w22, w30", + "eor w22, w21, w20", + "ror w20, w30, #6", + "eor w21, w20, w30, ror #11", + "eor w20, w21, w30, ror #25", + "add w21, w22, w20", + "mov w20, v4.s[1]", + "add w22, w21, w20", + "mov w20, v12.s[1]", + "add w21, w22, w20", + "and w20, w23, w24", + "orr w22, w23, w24", + "and w23, w18, w22", + "orr w22, w23, w20", + "add w20, w21, w22", + "ror w22, w18, #2", + "eor w23, w22, w18, ror #13", + "eor w22, w23, w18, ror #22", + "add w23, w20, w22", + "mov w20, v12.s[3]", + "add w22, w21, w20", + "mov v14.16b, v12.16b", + "mov v14.s[3], w23", + "mov v12.16b, v14.16b", + "mov v12.s[2], w18", + "mov v14.16b, v12.16b", + "mov v14.s[1], w22", + "mov v12.16b, v14.16b", + "mov v12.s[0], w30", + "mov v20.16b, v12.16b", + "tbl v14.16b, {v4.16b}, v5.16b", + "mov v16.16b, v14.16b", + "mov w20, v12.s[1]", + "mov w21, v12.s[0]", + "mov w22, v7.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[3]", - "mov w24, v17.s[2]", - "mov w25, v20.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v20.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v20.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v20.s[3]", - "add w20, w20, w23", - "mov v5.16b, v20.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v20.16b, v5.16b", - "mov v20.s[0], w22", - "tbl v16.16b, {v16.16b}, v4.16b", - "mov w20, v20.s[1]", - "mov w21, v20.s[0]", - "mov w22, v17.s[1]", - "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", - "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[3]", - "mov w24, v20.s[2]", - "mov w25, v17.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v17.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v17.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v17.s[3]", - "add w20, w20, w23", - "mov v5.16b, v17.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v17.16b, v5.16b", - "mov v17.s[0], w22", - "mov v16.16b, v21.16b", - "ext v16.16b, v22.16b, v21.16b, #4", - "add v24.4s, v24.4s, v16.4s", - "mov w20, #0x1000", - "movk w20, #0x1, lsl #16", - "ldr q16, [x29, x20, sxtx]", - "mov w20, v21.s[2]", - "mov w21, v21.s[3]", - "mov w22, v24.s[0]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w20, w20, #10", - "eor w20, w23, w20", - "add w20, w22, w20", - "mov w22, v24.s[1]", - "ror w23, w21, #17", - "ror w24, w21, #19", - "eor w23, w23, w24", - "lsr w21, w21, #10", - "eor w21, w23, w21", - "add w21, w22, w21", - "mov w22, v24.s[2]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w24, w20, #10", - "eor w23, w23, w24", - "add w22, w22, w23", - "mov w23, v24.s[3]", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v14.s[0]", + "add w23, w24, w22", + "mov w22, v7.s[0]", + "add w24, w23, w22", + "mov w22, v12.s[3]", + "mov w23, v12.s[2]", + "mov w25, v7.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v7.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v14.s[1]", + "add w21, w24, w20", + "mov w20, v7.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v7.s[3]", + "add w21, w24, w20", + "mov v4.16b, v7.16b", + "mov v4.s[3], w22", + "mov v7.16b, v4.16b", + "mov v7.s[2], w18", + "mov v4.16b, v7.16b", + "mov v4.s[1], w21", + "mov v7.16b, v4.16b", + "mov v7.s[0], w30", + "mov v17.16b, v7.16b", + "mov v16.16b, v8.16b", + "ext v4.16b, v9.16b, v8.16b, #4", + "mov v16.16b, v4.16b", + "add v14.4s, v3.4s, v4.4s", + "mov v24.16b, v14.16b", + "ldr x20, [sp, #32]", + "ldr x21, [sp, #128]", + "ldr q3, [x20, x21, sxtx]", + "mov v16.16b, v3.16b", + "mov w21, v8.s[2]", + "mov w22, v8.s[3]", + "mov w23, v14.s[0]", "ror w24, w21, #17", "ror w25, w21, #19", - "eor w24, w24, w25", - "lsr w25, w21, #10", - "eor w24, w24, w25", - "add w23, w23, w24", - "mov v5.16b, v24.16b", - "mov v5.s[3], w23", - "mov v5.s[2], w22", - "mov v5.s[1], w21", - "mov v24.16b, v5.16b", - "mov v24.s[0], w20", - "add v16.4s, v16.4s, v23.4s", + "eor w30, w24, w25", + "lsr w24, w21, #10", + "eor w21, w30, w24", + "add w24, w23, w21", + "mov w21, v14.s[1]", + "ror w23, w22, #17", + "ror w25, w22, #19", + "eor w30, w23, w25", + "lsr w23, w22, #10", + "eor w22, w30, w23", + "add w23, w21, w22", + "mov w21, v14.s[2]", + "ror w22, w24, #17", + "ror w25, w24, #19", + "eor w30, w22, w25", + "lsr w22, w24, #10", + "eor w25, w30, w22", + "add w22, w21, w25", + "mov w21, v14.s[3]", + "ror w25, w23, #17", + "ror w30, w23, #19", + "eor w18, w25, w30", + "lsr w25, w23, #10", + "eor w30, w18, w25", + "add w25, w21, w30", + "mov v4.16b, v14.16b", + "mov v4.s[3], w25", + "mov v14.16b, v4.16b", + "mov v14.s[2], w22", + "mov v4.16b, v14.16b", + "mov v4.s[1], w23", + "mov v14.16b, v4.16b", + "mov v14.s[0], w24", + "mov v24.16b, v14.16b", + "add v4.4s, v3.4s, v13.4s", + "mov v16.16b, v4.16b", + "mov v0.16b, v13.16b", "unimplemented (Unimplemented)", - "mov w20, v17.s[1]", - "mov w21, v17.s[0]", - "mov w22, v20.s[1]", - "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", - "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[3]", - "mov w24, v17.s[2]", - "mov w25, v20.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v20.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v20.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v20.s[3]", - "add w20, w20, w23", - "mov v5.16b, v20.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v20.16b, v5.16b", - "mov v20.s[0], w22", - "tbl v16.16b, {v16.16b}, v4.16b", - "mov w20, v20.s[1]", - "mov w21, v20.s[0]", - "mov w22, v17.s[1]", + "mov v3.16b, v0.16b", + "mov v23.16b, v3.16b", + "mov w21, v7.s[1]", + "mov w22, v7.s[0]", + "mov w23, v12.s[1]", + "and w24, w21, w22", + "bic w25, w23, w21", + "eor w23, w24, w25", + "ror w24, w21, #6", + "eor w25, w24, w21, ror #11", + "eor w24, w25, w21, ror #25", + "add w25, w23, w24", + "mov w23, v4.s[0]", + "add w24, w25, w23", + "mov w23, v12.s[0]", + "add w25, w24, w23", + "mov w23, v7.s[3]", + "mov w24, v7.s[2]", + "mov w30, v12.s[3]", + "and w18, w24, w30", + "orr w20, w24, w30", + "and w30, w23, w20", + "orr w20, w30, w18", + "add w30, w25, w20", + "ror w20, w23, #2", + "eor w18, w20, w23, ror #13", + "eor w20, w18, w23, ror #22", + "add w18, w30, w20", + "mov w20, v12.s[2]", + "add w30, w25, w20", + "and w20, w30, w21", + "bic w21, w22, w30", + "eor w22, w20, w21", + "ror w20, w30, #6", + "eor w21, w20, w30, ror #11", + "eor w20, w21, w30, ror #25", + "add w21, w22, w20", + "mov w20, v4.s[1]", + "add w22, w21, w20", + "mov w20, v12.s[1]", + "add w21, w22, w20", + "and w20, w23, w24", + "orr w22, w23, w24", + "and w23, w18, w22", + "orr w22, w23, w20", + "add w20, w21, w22", + "ror w22, w18, #2", + "eor w23, w22, w18, ror #13", + "eor w22, w23, w18, ror #22", + "add w23, w20, w22", + "mov w20, v12.s[3]", + "add w22, w21, w20", + "mov v13.16b, v12.16b", + "mov v13.s[3], w23", + "mov v12.16b, v13.16b", + "mov v12.s[2], w18", + "mov v13.16b, v12.16b", + "mov v13.s[1], w22", + "mov v12.16b, v13.16b", + "mov v12.s[0], w30", + "mov v20.16b, v12.16b", + "tbl v13.16b, {v4.16b}, v5.16b", + "mov v16.16b, v13.16b", + "mov w20, v12.s[1]", + "mov w21, v12.s[0]", + "mov w22, v7.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[3]", - "mov w24, v20.s[2]", - "mov w25, v17.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v17.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v17.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v17.s[3]", - "add w20, w20, w23", - "mov v5.16b, v17.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v17.16b, v5.16b", - "mov v17.s[0], w22", - "mov v16.16b, v24.16b", - "ext v16.16b, v21.16b, v24.16b, #4", - "add v23.4s, v23.4s, v16.4s", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v13.s[0]", + "add w23, w24, w22", + "mov w22, v7.s[0]", + "add w24, w23, w22", + "mov w22, v12.s[3]", + "mov w23, v12.s[2]", + "mov w25, v7.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v7.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v13.s[1]", + "add w21, w24, w20", + "mov w20, v7.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v7.s[3]", + "add w21, w24, w20", + "mov v4.16b, v7.16b", + "mov v4.s[3], w22", + "mov v7.16b, v4.16b", + "mov v7.s[2], w18", + "mov v4.16b, v7.16b", + "mov v4.s[1], w21", + "mov v7.16b, v4.16b", + "mov v7.s[0], w30", + "mov v17.16b, v7.16b", + "mov v16.16b, v14.16b", + "ext v4.16b, v8.16b, v14.16b, #4", + "mov v16.16b, v4.16b", + "add v13.4s, v3.4s, v4.4s", + "mov v23.16b, v13.16b", "mov w20, #0x1000", "movk w20, #0x1, lsl #16", - "ldr q16, [x29, x20, sxtx]", - "mov w20, v24.s[2]", - "mov w21, v24.s[3]", - "mov w22, v23.s[0]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w20, w20, #10", - "eor w20, w23, w20", - "add w20, w22, w20", - "mov w22, v23.s[1]", - "ror w23, w21, #17", - "ror w24, w21, #19", - "eor w23, w23, w24", - "lsr w21, w21, #10", - "eor w21, w23, w21", - "add w21, w22, w21", - "mov w22, v23.s[2]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w24, w20, #10", - "eor w23, w23, w24", - "add w22, w22, w23", - "mov w23, v23.s[3]", - "ror w24, w21, #17", - "ror w25, w21, #19", - "eor w24, w24, w25", - "lsr w25, w21, #10", - "eor w24, w24, w25", - "add w23, w23, w24", - "mov v5.16b, v23.16b", - "mov v5.s[3], w23", - "mov v5.s[2], w22", - "mov v5.s[1], w21", - "mov v23.16b, v5.16b", - "mov v23.s[0], w20", - "add v16.4s, v16.4s, v22.4s", + "ldr x21, [sp, #32]", + "ldr q3, [x21, x20, sxtx]", + "mov v16.16b, v3.16b", + "mov w22, v14.s[2]", + "mov w23, v14.s[3]", + "mov w24, v13.s[0]", + "ror w25, w22, #17", + "ror w30, w22, #19", + "eor w18, w25, w30", + "lsr w25, w22, #10", + "eor w22, w18, w25", + "add w25, w24, w22", + "mov w22, v13.s[1]", + "ror w24, w23, #17", + "ror w30, w23, #19", + "eor w18, w24, w30", + "lsr w24, w23, #10", + "eor w23, w18, w24", + "add w24, w22, w23", + "mov w22, v13.s[2]", + "ror w23, w25, #17", + "ror w30, w25, #19", + "eor w18, w23, w30", + "lsr w23, w25, #10", + "eor w30, w18, w23", + "add w23, w22, w30", + "mov w22, v13.s[3]", + "ror w30, w24, #17", + "ror w18, w24, #19", + "str x20, [sp, #160]", + "eor w20, w30, w18", + "lsr w30, w24, #10", + "eor w18, w20, w30", + "add w20, w22, w18", + "mov v4.16b, v13.16b", + "mov v4.s[3], w20", + "mov v13.16b, v4.16b", + "mov v13.s[2], w23", + "mov v4.16b, v13.16b", + "mov v4.s[1], w24", + "mov v13.16b, v4.16b", + "mov v13.s[0], w25", + "mov v23.16b, v13.16b", + "add v4.4s, v3.4s, v9.4s", + "mov v16.16b, v4.16b", + "mov v0.16b, v9.16b", "unimplemented (Unimplemented)", - "mov w20, v17.s[1]", - "mov w21, v17.s[0]", - "mov w22, v20.s[1]", + "mov v3.16b, v0.16b", + "mov v22.16b, v3.16b", + "mov w20, v7.s[1]", + "mov w22, v7.s[0]", + "mov w23, v12.s[1]", + "and w24, w20, w22", + "bic w25, w23, w20", + "eor w23, w24, w25", + "ror w24, w20, #6", + "eor w25, w24, w20, ror #11", + "eor w24, w25, w20, ror #25", + "add w25, w23, w24", + "mov w23, v4.s[0]", + "add w24, w25, w23", + "mov w23, v12.s[0]", + "add w25, w24, w23", + "mov w23, v7.s[3]", + "mov w24, v7.s[2]", + "mov w30, v12.s[3]", + "and w18, w24, w30", + "orr w21, w24, w30", + "and w30, w23, w21", + "orr w21, w30, w18", + "add w30, w25, w21", + "ror w21, w23, #2", + "eor w18, w21, w23, ror #13", + "eor w21, w18, w23, ror #22", + "add w18, w30, w21", + "mov w21, v12.s[2]", + "add w30, w25, w21", + "and w21, w30, w20", + "bic w20, w22, w30", + "eor w22, w21, w20", + "ror w20, w30, #6", + "eor w21, w20, w30, ror #11", + "eor w20, w21, w30, ror #25", + "add w21, w22, w20", + "mov w20, v4.s[1]", + "add w22, w21, w20", + "mov w20, v12.s[1]", + "add w21, w22, w20", + "and w20, w23, w24", + "orr w22, w23, w24", + "and w23, w18, w22", + "orr w22, w23, w20", + "add w20, w21, w22", + "ror w22, w18, #2", + "eor w23, w22, w18, ror #13", + "eor w22, w23, w18, ror #22", + "add w23, w20, w22", + "mov w20, v12.s[3]", + "add w22, w21, w20", + "mov v9.16b, v12.16b", + "mov v9.s[3], w23", + "mov v12.16b, v9.16b", + "mov v12.s[2], w18", + "mov v9.16b, v12.16b", + "mov v9.s[1], w22", + "mov v12.16b, v9.16b", + "mov v12.s[0], w30", + "mov v20.16b, v12.16b", + "tbl v9.16b, {v4.16b}, v5.16b", + "mov v16.16b, v9.16b", + "mov w20, v12.s[1]", + "mov w21, v12.s[0]", + "mov w22, v7.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[3]", - "mov w24, v17.s[2]", - "mov w25, v20.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v20.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v20.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v20.s[3]", - "add w20, w20, w23", - "mov v5.16b, v20.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v20.16b, v5.16b", - "mov v20.s[0], w22", - "tbl v16.16b, {v16.16b}, v4.16b", - "mov w20, v20.s[1]", - "mov w21, v20.s[0]", - "mov w22, v17.s[1]", - "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", - "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[3]", - "mov w24, v20.s[2]", - "mov w25, v17.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v17.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v17.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v17.s[3]", - "add w20, w20, w23", - "mov v5.16b, v17.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v17.16b, v5.16b", - "mov v17.s[0], w22", - "mov v16.16b, v23.16b", - "ext v16.16b, v24.16b, v23.16b, #4", - "add v22.4s, v22.4s, v16.4s", - "mov w20, #0x1000", - "movk w20, #0x1, lsl #16", - "ldr q16, [x29, x20, sxtx]", - "mov w20, v23.s[2]", - "mov w21, v23.s[3]", - "mov w22, v22.s[0]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w20, w20, #10", - "eor w20, w23, w20", - "add w20, w22, w20", - "mov w22, v22.s[1]", - "ror w23, w21, #17", - "ror w24, w21, #19", - "eor w23, w23, w24", - "lsr w21, w21, #10", - "eor w21, w23, w21", - "add w21, w22, w21", - "mov w22, v22.s[2]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w24, w20, #10", - "eor w23, w23, w24", - "add w22, w22, w23", - "mov w23, v22.s[3]", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v9.s[0]", + "add w23, w24, w22", + "mov w22, v7.s[0]", + "add w24, w23, w22", + "mov w22, v12.s[3]", + "mov w23, v12.s[2]", + "mov w25, v7.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v7.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v9.s[1]", + "add w21, w24, w20", + "mov w20, v7.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v7.s[3]", + "add w21, w24, w20", + "mov v4.16b, v7.16b", + "mov v4.s[3], w22", + "mov v7.16b, v4.16b", + "mov v7.s[2], w18", + "mov v4.16b, v7.16b", + "mov v4.s[1], w21", + "mov v7.16b, v4.16b", + "mov v7.s[0], w30", + "mov v17.16b, v7.16b", + "mov v16.16b, v13.16b", + "ext v4.16b, v14.16b, v13.16b, #4", + "mov v16.16b, v4.16b", + "add v9.4s, v3.4s, v4.4s", + "mov v22.16b, v9.16b", + "ldr x20, [sp, #32]", + "ldr x21, [sp, #160]", + "ldr q3, [x20, x21, sxtx]", + "mov v16.16b, v3.16b", + "mov w21, v13.s[2]", + "mov w22, v13.s[3]", + "mov w23, v9.s[0]", "ror w24, w21, #17", "ror w25, w21, #19", - "eor w24, w24, w25", - "lsr w25, w21, #10", - "eor w24, w24, w25", - "add w23, w23, w24", - "mov v5.16b, v22.16b", - "mov v5.s[3], w23", - "mov v5.s[2], w22", - "mov v5.s[1], w21", - "mov v22.16b, v5.16b", - "mov v22.s[0], w20", - "add v16.4s, v16.4s, v21.4s", + "eor w30, w24, w25", + "lsr w24, w21, #10", + "eor w21, w30, w24", + "add w24, w23, w21", + "mov w21, v9.s[1]", + "ror w23, w22, #17", + "ror w25, w22, #19", + "eor w30, w23, w25", + "lsr w23, w22, #10", + "eor w22, w30, w23", + "add w23, w21, w22", + "mov w21, v9.s[2]", + "ror w22, w24, #17", + "ror w25, w24, #19", + "eor w30, w22, w25", + "lsr w22, w24, #10", + "eor w25, w30, w22", + "add w22, w21, w25", + "mov w21, v9.s[3]", + "ror w25, w23, #17", + "ror w30, w23, #19", + "eor w18, w25, w30", + "lsr w25, w23, #10", + "eor w30, w18, w25", + "add w25, w21, w30", + "mov v4.16b, v9.16b", + "mov v4.s[3], w25", + "mov v9.16b, v4.16b", + "mov v9.s[2], w22", + "mov v4.16b, v9.16b", + "mov v4.s[1], w23", + "mov v9.16b, v4.16b", + "mov v9.s[0], w24", + "mov v22.16b, v9.16b", + "add v4.4s, v3.4s, v8.4s", + "mov v16.16b, v4.16b", + "mov v0.16b, v8.16b", "unimplemented (Unimplemented)", - "mov w20, v17.s[1]", - "mov w21, v17.s[0]", - "mov w22, v20.s[1]", - "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", - "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[3]", - "mov w24, v17.s[2]", - "mov w25, v20.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v20.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v20.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v20.s[3]", - "add w20, w20, w23", - "mov v5.16b, v20.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v20.16b, v5.16b", - "mov v20.s[0], w22", - "tbl v16.16b, {v16.16b}, v4.16b", - "mov w20, v20.s[1]", - "mov w21, v20.s[0]", - "mov w22, v17.s[1]", + "mov v3.16b, v0.16b", + "mov v21.16b, v3.16b", + "mov w21, v7.s[1]", + "mov w22, v7.s[0]", + "mov w23, v12.s[1]", + "and w24, w21, w22", + "bic w25, w23, w21", + "eor w23, w24, w25", + "ror w24, w21, #6", + "eor w25, w24, w21, ror #11", + "eor w24, w25, w21, ror #25", + "add w25, w23, w24", + "mov w23, v4.s[0]", + "add w24, w25, w23", + "mov w23, v12.s[0]", + "add w25, w24, w23", + "mov w23, v7.s[3]", + "mov w24, v7.s[2]", + "mov w30, v12.s[3]", + "and w18, w24, w30", + "orr w20, w24, w30", + "and w30, w23, w20", + "orr w20, w30, w18", + "add w30, w25, w20", + "ror w20, w23, #2", + "eor w18, w20, w23, ror #13", + "eor w20, w18, w23, ror #22", + "add w18, w30, w20", + "mov w20, v12.s[2]", + "add w30, w25, w20", + "and w20, w30, w21", + "bic w21, w22, w30", + "eor w22, w20, w21", + "ror w20, w30, #6", + "eor w21, w20, w30, ror #11", + "eor w20, w21, w30, ror #25", + "add w21, w22, w20", + "mov w20, v4.s[1]", + "add w22, w21, w20", + "mov w20, v12.s[1]", + "add w21, w22, w20", + "and w20, w23, w24", + "orr w22, w23, w24", + "and w23, w18, w22", + "orr w22, w23, w20", + "add w20, w21, w22", + "ror w22, w18, #2", + "eor w23, w22, w18, ror #13", + "eor w22, w23, w18, ror #22", + "add w23, w20, w22", + "mov w20, v12.s[3]", + "add w22, w21, w20", + "mov v8.16b, v12.16b", + "mov v8.s[3], w23", + "mov v12.16b, v8.16b", + "mov v12.s[2], w18", + "mov v8.16b, v12.16b", + "mov v8.s[1], w22", + "mov v12.16b, v8.16b", + "mov v12.s[0], w30", + "mov v20.16b, v12.16b", + "tbl v8.16b, {v4.16b}, v5.16b", + "mov v16.16b, v8.16b", + "mov w20, v12.s[1]", + "mov w21, v12.s[0]", + "mov w22, v7.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[3]", - "mov w24, v20.s[2]", - "mov w25, v17.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v17.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v17.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v17.s[3]", - "add w20, w20, w23", - "mov v5.16b, v17.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v17.16b, v5.16b", - "mov v17.s[0], w22", - "mov v16.16b, v22.16b", - "ext v16.16b, v23.16b, v22.16b, #4", - "add v21.4s, v21.4s, v16.4s", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v8.s[0]", + "add w23, w24, w22", + "mov w22, v7.s[0]", + "add w24, w23, w22", + "mov w22, v12.s[3]", + "mov w23, v12.s[2]", + "mov w25, v7.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v7.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v8.s[1]", + "add w21, w24, w20", + "mov w20, v7.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v7.s[3]", + "add w21, w24, w20", + "mov v4.16b, v7.16b", + "mov v4.s[3], w22", + "mov v7.16b, v4.16b", + "mov v7.s[2], w18", + "mov v4.16b, v7.16b", + "mov v4.s[1], w21", + "mov v7.16b, v4.16b", + "mov v7.s[0], w30", + "mov v17.16b, v7.16b", + "mov v16.16b, v9.16b", + "ext v4.16b, v13.16b, v9.16b, #4", + "mov v16.16b, v4.16b", + "add v8.4s, v3.4s, v4.4s", + "mov v21.16b, v8.16b", "mov w20, #0x1000", "movk w20, #0x1, lsl #16", - "ldr q16, [x29, x20, sxtx]", - "mov w20, v22.s[2]", - "mov w21, v22.s[3]", - "mov w22, v21.s[0]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w20, w20, #10", - "eor w20, w23, w20", - "add w20, w22, w20", - "mov w22, v21.s[1]", - "ror w23, w21, #17", - "ror w24, w21, #19", - "eor w23, w23, w24", - "lsr w21, w21, #10", - "eor w21, w23, w21", - "add w21, w22, w21", - "mov w22, v21.s[2]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w24, w20, #10", - "eor w23, w23, w24", - "add w22, w22, w23", - "mov w23, v21.s[3]", - "ror w24, w21, #17", - "ror w25, w21, #19", - "eor w24, w24, w25", - "lsr w25, w21, #10", - "eor w24, w24, w25", - "add w23, w23, w24", - "mov v5.16b, v21.16b", - "mov v5.s[3], w23", - "mov v5.s[2], w22", - "mov v5.s[1], w21", - "mov v21.16b, v5.16b", - "mov v21.s[0], w20", - "add v16.4s, v16.4s, v24.4s", + "ldr x21, [sp, #32]", + "ldr q3, [x21, x20, sxtx]", + "mov v16.16b, v3.16b", + "mov w22, v9.s[2]", + "mov w23, v9.s[3]", + "mov w24, v8.s[0]", + "ror w25, w22, #17", + "ror w30, w22, #19", + "eor w18, w25, w30", + "lsr w25, w22, #10", + "eor w22, w18, w25", + "add w25, w24, w22", + "mov w22, v8.s[1]", + "ror w24, w23, #17", + "ror w30, w23, #19", + "eor w18, w24, w30", + "lsr w24, w23, #10", + "eor w23, w18, w24", + "add w24, w22, w23", + "mov w22, v8.s[2]", + "ror w23, w25, #17", + "ror w30, w25, #19", + "eor w18, w23, w30", + "lsr w23, w25, #10", + "eor w30, w18, w23", + "add w23, w22, w30", + "mov w22, v8.s[3]", + "ror w30, w24, #17", + "ror w18, w24, #19", + "str x20, [sp, #192]", + "eor w20, w30, w18", + "lsr w30, w24, #10", + "eor w18, w20, w30", + "add w20, w22, w18", + "mov v4.16b, v8.16b", + "mov v4.s[3], w20", + "mov v8.16b, v4.16b", + "mov v8.s[2], w23", + "mov v4.16b, v8.16b", + "mov v4.s[1], w24", + "mov v8.16b, v4.16b", + "mov v8.s[0], w25", + "mov v21.16b, v8.16b", + "add v4.4s, v3.4s, v14.4s", + "mov v16.16b, v4.16b", + "mov v0.16b, v14.16b", "unimplemented (Unimplemented)", - "mov w20, v17.s[1]", - "mov w21, v17.s[0]", - "mov w22, v20.s[1]", + "mov v3.16b, v0.16b", + "mov v24.16b, v3.16b", + "mov w20, v7.s[1]", + "mov w22, v7.s[0]", + "mov w23, v12.s[1]", + "and w24, w20, w22", + "bic w25, w23, w20", + "eor w23, w24, w25", + "ror w24, w20, #6", + "eor w25, w24, w20, ror #11", + "eor w24, w25, w20, ror #25", + "add w25, w23, w24", + "mov w23, v4.s[0]", + "add w24, w25, w23", + "mov w23, v12.s[0]", + "add w25, w24, w23", + "mov w23, v7.s[3]", + "mov w24, v7.s[2]", + "mov w30, v12.s[3]", + "and w18, w24, w30", + "orr w21, w24, w30", + "and w30, w23, w21", + "orr w21, w30, w18", + "add w30, w25, w21", + "ror w21, w23, #2", + "eor w18, w21, w23, ror #13", + "eor w21, w18, w23, ror #22", + "add w18, w30, w21", + "mov w21, v12.s[2]", + "add w30, w25, w21", + "and w21, w30, w20", + "bic w20, w22, w30", + "eor w22, w21, w20", + "ror w20, w30, #6", + "eor w21, w20, w30, ror #11", + "eor w20, w21, w30, ror #25", + "add w21, w22, w20", + "mov w20, v4.s[1]", + "add w22, w21, w20", + "mov w20, v12.s[1]", + "add w21, w22, w20", + "and w20, w23, w24", + "orr w22, w23, w24", + "and w23, w18, w22", + "orr w22, w23, w20", + "add w20, w21, w22", + "ror w22, w18, #2", + "eor w23, w22, w18, ror #13", + "eor w22, w23, w18, ror #22", + "add w23, w20, w22", + "mov w20, v12.s[3]", + "add w22, w21, w20", + "mov v14.16b, v12.16b", + "mov v14.s[3], w23", + "mov v12.16b, v14.16b", + "mov v12.s[2], w18", + "mov v14.16b, v12.16b", + "mov v14.s[1], w22", + "mov v12.16b, v14.16b", + "mov v12.s[0], w30", + "mov v20.16b, v12.16b", + "tbl v14.16b, {v4.16b}, v5.16b", + "mov v16.16b, v14.16b", + "mov w20, v12.s[1]", + "mov w21, v12.s[0]", + "mov w22, v7.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[3]", - "mov w24, v17.s[2]", - "mov w25, v20.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v20.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v20.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v20.s[3]", - "add w20, w20, w23", - "mov v5.16b, v20.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v20.16b, v5.16b", - "mov v20.s[0], w22", - "tbl v16.16b, {v16.16b}, v4.16b", - "mov w20, v20.s[1]", - "mov w21, v20.s[0]", - "mov w22, v17.s[1]", - "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", - "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[3]", - "mov w24, v20.s[2]", - "mov w25, v17.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v17.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v17.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v17.s[3]", - "add w20, w20, w23", - "mov v5.16b, v17.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v17.16b, v5.16b", - "mov v17.s[0], w22", - "mov v16.16b, v21.16b", - "ext v16.16b, v22.16b, v21.16b, #4", - "add v24.4s, v24.4s, v16.4s", - "mov w20, #0x1000", - "movk w20, #0x1, lsl #16", - "ldr q16, [x29, x20, sxtx]", - "mov w20, v21.s[2]", - "mov w21, v21.s[3]", - "mov w22, v24.s[0]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w20, w20, #10", - "eor w20, w23, w20", - "add w20, w22, w20", - "mov w22, v24.s[1]", - "ror w23, w21, #17", - "ror w24, w21, #19", - "eor w23, w23, w24", - "lsr w21, w21, #10", - "eor w21, w23, w21", - "add w21, w22, w21", - "mov w22, v24.s[2]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w24, w20, #10", - "eor w23, w23, w24", - "add w22, w22, w23", - "mov w23, v24.s[3]", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v14.s[0]", + "add w23, w24, w22", + "mov w22, v7.s[0]", + "add w24, w23, w22", + "mov w22, v12.s[3]", + "mov w23, v12.s[2]", + "mov w25, v7.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v7.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v14.s[1]", + "add w21, w24, w20", + "mov w20, v7.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v7.s[3]", + "add w21, w24, w20", + "mov v4.16b, v7.16b", + "mov v4.s[3], w22", + "mov v7.16b, v4.16b", + "mov v7.s[2], w18", + "mov v4.16b, v7.16b", + "mov v4.s[1], w21", + "mov v7.16b, v4.16b", + "mov v7.s[0], w30", + "mov v17.16b, v7.16b", + "mov v16.16b, v8.16b", + "ext v4.16b, v9.16b, v8.16b, #4", + "mov v16.16b, v4.16b", + "add v14.4s, v3.4s, v4.4s", + "mov v24.16b, v14.16b", + "ldr x20, [sp, #32]", + "ldr x21, [sp, #192]", + "ldr q3, [x20, x21, sxtx]", + "mov v16.16b, v3.16b", + "mov w21, v8.s[2]", + "mov w22, v8.s[3]", + "mov w23, v14.s[0]", "ror w24, w21, #17", "ror w25, w21, #19", - "eor w24, w24, w25", - "lsr w25, w21, #10", - "eor w24, w24, w25", - "add w23, w23, w24", - "mov v5.16b, v24.16b", - "mov v5.s[3], w23", - "mov v5.s[2], w22", - "mov v5.s[1], w21", - "mov v24.16b, v5.16b", - "mov v24.s[0], w20", - "add v16.4s, v16.4s, v23.4s", + "eor w30, w24, w25", + "lsr w24, w21, #10", + "eor w21, w30, w24", + "add w24, w23, w21", + "mov w21, v14.s[1]", + "ror w23, w22, #17", + "ror w25, w22, #19", + "eor w30, w23, w25", + "lsr w23, w22, #10", + "eor w22, w30, w23", + "add w23, w21, w22", + "mov w21, v14.s[2]", + "ror w22, w24, #17", + "ror w25, w24, #19", + "eor w30, w22, w25", + "lsr w22, w24, #10", + "eor w25, w30, w22", + "add w22, w21, w25", + "mov w21, v14.s[3]", + "ror w25, w23, #17", + "ror w30, w23, #19", + "eor w18, w25, w30", + "lsr w25, w23, #10", + "eor w30, w18, w25", + "add w25, w21, w30", + "mov v4.16b, v14.16b", + "mov v4.s[3], w25", + "mov v14.16b, v4.16b", + "mov v14.s[2], w22", + "mov v4.16b, v14.16b", + "mov v4.s[1], w23", + "mov v14.16b, v4.16b", + "mov v14.s[0], w24", + "mov v24.16b, v14.16b", + "add v4.4s, v3.4s, v13.4s", + "mov v16.16b, v4.16b", + "mov v0.16b, v13.16b", "unimplemented (Unimplemented)", - "mov w20, v17.s[1]", - "mov w21, v17.s[0]", - "mov w22, v20.s[1]", - "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", - "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[3]", - "mov w24, v17.s[2]", - "mov w25, v20.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v20.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v20.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v20.s[3]", - "add w20, w20, w23", - "mov v5.16b, v20.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v20.16b, v5.16b", - "mov v20.s[0], w22", - "tbl v16.16b, {v16.16b}, v4.16b", - "mov w20, v20.s[1]", - "mov w21, v20.s[0]", - "mov w22, v17.s[1]", + "mov v3.16b, v0.16b", + "mov v23.16b, v3.16b", + "mov w21, v7.s[1]", + "mov w22, v7.s[0]", + "mov w23, v12.s[1]", + "and w24, w21, w22", + "bic w25, w23, w21", + "eor w23, w24, w25", + "ror w24, w21, #6", + "eor w25, w24, w21, ror #11", + "eor w24, w25, w21, ror #25", + "add w25, w23, w24", + "mov w23, v4.s[0]", + "add w24, w25, w23", + "mov w23, v12.s[0]", + "add w25, w24, w23", + "mov w23, v7.s[3]", + "mov w24, v7.s[2]", + "mov w30, v12.s[3]", + "and w18, w24, w30", + "orr w20, w24, w30", + "and w30, w23, w20", + "orr w20, w30, w18", + "add w30, w25, w20", + "ror w20, w23, #2", + "eor w18, w20, w23, ror #13", + "eor w20, w18, w23, ror #22", + "add w18, w30, w20", + "mov w20, v12.s[2]", + "add w30, w25, w20", + "and w20, w30, w21", + "bic w21, w22, w30", + "eor w22, w20, w21", + "ror w20, w30, #6", + "eor w21, w20, w30, ror #11", + "eor w20, w21, w30, ror #25", + "add w21, w22, w20", + "mov w20, v4.s[1]", + "add w22, w21, w20", + "mov w20, v12.s[1]", + "add w21, w22, w20", + "and w20, w23, w24", + "orr w22, w23, w24", + "and w23, w18, w22", + "orr w22, w23, w20", + "add w20, w21, w22", + "ror w22, w18, #2", + "eor w23, w22, w18, ror #13", + "eor w22, w23, w18, ror #22", + "add w23, w20, w22", + "mov w20, v12.s[3]", + "add w22, w21, w20", + "mov v13.16b, v12.16b", + "mov v13.s[3], w23", + "mov v12.16b, v13.16b", + "mov v12.s[2], w18", + "mov v13.16b, v12.16b", + "mov v13.s[1], w22", + "mov v12.16b, v13.16b", + "mov v12.s[0], w30", + "mov v20.16b, v12.16b", + "tbl v13.16b, {v4.16b}, v5.16b", + "mov v16.16b, v13.16b", + "mov w20, v12.s[1]", + "mov w21, v12.s[0]", + "mov w22, v7.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[3]", - "mov w24, v20.s[2]", - "mov w25, v17.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v17.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v17.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v17.s[3]", - "add w20, w20, w23", - "mov v5.16b, v17.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v17.16b, v5.16b", - "mov v17.s[0], w22", - "mov v16.16b, v24.16b", - "ext v16.16b, v21.16b, v24.16b, #4", - "add v23.4s, v23.4s, v16.4s", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v13.s[0]", + "add w23, w24, w22", + "mov w22, v7.s[0]", + "add w24, w23, w22", + "mov w22, v12.s[3]", + "mov w23, v12.s[2]", + "mov w25, v7.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v7.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v13.s[1]", + "add w21, w24, w20", + "mov w20, v7.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v7.s[3]", + "add w21, w24, w20", + "mov v4.16b, v7.16b", + "mov v4.s[3], w22", + "mov v7.16b, v4.16b", + "mov v7.s[2], w18", + "mov v4.16b, v7.16b", + "mov v4.s[1], w21", + "mov v7.16b, v4.16b", + "mov v7.s[0], w30", + "mov v17.16b, v7.16b", + "mov v16.16b, v14.16b", + "ext v4.16b, v8.16b, v14.16b, #4", + "mov v16.16b, v4.16b", + "add v13.4s, v3.4s, v4.4s", + "mov v23.16b, v13.16b", "mov w20, #0x1000", "movk w20, #0x1, lsl #16", - "ldr q16, [x29, x20, sxtx]", - "mov w20, v24.s[2]", - "mov w21, v24.s[3]", - "mov w22, v23.s[0]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w20, w20, #10", - "eor w20, w23, w20", - "add w20, w22, w20", - "mov w22, v23.s[1]", - "ror w23, w21, #17", - "ror w24, w21, #19", - "eor w23, w23, w24", - "lsr w21, w21, #10", - "eor w21, w23, w21", - "add w21, w22, w21", - "mov w22, v23.s[2]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w24, w20, #10", - "eor w23, w23, w24", - "add w22, w22, w23", - "mov w23, v23.s[3]", - "ror w24, w21, #17", - "ror w25, w21, #19", - "eor w24, w24, w25", - "lsr w25, w21, #10", - "eor w24, w24, w25", - "add w23, w23, w24", - "mov v5.16b, v23.16b", - "mov v5.s[3], w23", - "mov v5.s[2], w22", - "mov v5.s[1], w21", - "mov v23.16b, v5.16b", - "mov v23.s[0], w20", - "add v16.4s, v16.4s, v22.4s", + "ldr x21, [sp, #32]", + "ldr q3, [x21, x20, sxtx]", + "mov v16.16b, v3.16b", + "mov w22, v14.s[2]", + "mov w23, v14.s[3]", + "mov w24, v13.s[0]", + "ror w25, w22, #17", + "ror w30, w22, #19", + "eor w18, w25, w30", + "lsr w25, w22, #10", + "eor w22, w18, w25", + "add w25, w24, w22", + "mov w22, v13.s[1]", + "ror w24, w23, #17", + "ror w30, w23, #19", + "eor w18, w24, w30", + "lsr w24, w23, #10", + "eor w23, w18, w24", + "add w24, w22, w23", + "mov w22, v13.s[2]", + "ror w23, w25, #17", + "ror w30, w25, #19", + "eor w18, w23, w30", + "lsr w23, w25, #10", + "eor w30, w18, w23", + "add w23, w22, w30", + "mov w22, v13.s[3]", + "ror w30, w24, #17", + "ror w18, w24, #19", + "str x20, [sp, #224]", + "eor w20, w30, w18", + "lsr w30, w24, #10", + "eor w18, w20, w30", + "add w20, w22, w18", + "mov v4.16b, v13.16b", + "mov v4.s[3], w20", + "mov v13.16b, v4.16b", + "mov v13.s[2], w23", + "mov v4.16b, v13.16b", + "mov v4.s[1], w24", + "mov v13.16b, v4.16b", + "mov v13.s[0], w25", + "mov v23.16b, v13.16b", + "add v4.4s, v3.4s, v9.4s", + "mov v16.16b, v4.16b", + "mov v0.16b, v9.16b", "unimplemented (Unimplemented)", - "mov w20, v17.s[1]", - "mov w21, v17.s[0]", - "mov w22, v20.s[1]", + "mov v3.16b, v0.16b", + "mov v22.16b, v3.16b", + "mov w20, v7.s[1]", + "mov w22, v7.s[0]", + "mov w23, v12.s[1]", + "and w24, w20, w22", + "bic w25, w23, w20", + "eor w23, w24, w25", + "ror w24, w20, #6", + "eor w25, w24, w20, ror #11", + "eor w24, w25, w20, ror #25", + "add w25, w23, w24", + "mov w23, v4.s[0]", + "add w24, w25, w23", + "mov w23, v12.s[0]", + "add w25, w24, w23", + "mov w23, v7.s[3]", + "mov w24, v7.s[2]", + "mov w30, v12.s[3]", + "and w18, w24, w30", + "orr w21, w24, w30", + "and w30, w23, w21", + "orr w21, w30, w18", + "add w30, w25, w21", + "ror w21, w23, #2", + "eor w18, w21, w23, ror #13", + "eor w21, w18, w23, ror #22", + "add w18, w30, w21", + "mov w21, v12.s[2]", + "add w30, w25, w21", + "and w21, w30, w20", + "bic w20, w22, w30", + "eor w22, w21, w20", + "ror w20, w30, #6", + "eor w21, w20, w30, ror #11", + "eor w20, w21, w30, ror #25", + "add w21, w22, w20", + "mov w20, v4.s[1]", + "add w22, w21, w20", + "mov w20, v12.s[1]", + "add w21, w22, w20", + "and w20, w23, w24", + "orr w22, w23, w24", + "and w23, w18, w22", + "orr w22, w23, w20", + "add w20, w21, w22", + "ror w22, w18, #2", + "eor w23, w22, w18, ror #13", + "eor w22, w23, w18, ror #22", + "add w23, w20, w22", + "mov w20, v12.s[3]", + "add w22, w21, w20", + "mov v9.16b, v12.16b", + "mov v9.s[3], w23", + "mov v12.16b, v9.16b", + "mov v12.s[2], w18", + "mov v9.16b, v12.16b", + "mov v9.s[1], w22", + "mov v12.16b, v9.16b", + "mov v12.s[0], w30", + "mov v20.16b, v12.16b", + "tbl v9.16b, {v4.16b}, v5.16b", + "mov v16.16b, v9.16b", + "mov w20, v12.s[1]", + "mov w21, v12.s[0]", + "mov w22, v7.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[3]", - "mov w24, v17.s[2]", - "mov w25, v20.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v20.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v20.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v20.s[3]", - "add w20, w20, w23", - "mov v5.16b, v20.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v20.16b, v5.16b", - "mov v20.s[0], w22", - "tbl v16.16b, {v16.16b}, v4.16b", - "mov w20, v20.s[1]", - "mov w21, v20.s[0]", - "mov w22, v17.s[1]", - "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", - "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[3]", - "mov w24, v20.s[2]", - "mov w25, v17.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v17.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v17.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v17.s[3]", - "add w20, w20, w23", - "mov v5.16b, v17.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v17.16b, v5.16b", - "mov v17.s[0], w22", - "mov v16.16b, v23.16b", - "ext v16.16b, v24.16b, v23.16b, #4", - "add v22.4s, v22.4s, v16.4s", - "mov w20, #0x1000", - "movk w20, #0x1, lsl #16", - "ldr q16, [x29, x20, sxtx]", - "mov w20, v23.s[2]", - "mov w21, v23.s[3]", - "mov w22, v22.s[0]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w20, w20, #10", - "eor w20, w23, w20", - "add w20, w22, w20", - "mov w22, v22.s[1]", - "ror w23, w21, #17", - "ror w24, w21, #19", - "eor w23, w23, w24", - "lsr w21, w21, #10", - "eor w21, w23, w21", - "add w21, w22, w21", - "mov w22, v22.s[2]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w24, w20, #10", - "eor w23, w23, w24", - "add w22, w22, w23", - "mov w23, v22.s[3]", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v9.s[0]", + "add w23, w24, w22", + "mov w22, v7.s[0]", + "add w24, w23, w22", + "mov w22, v12.s[3]", + "mov w23, v12.s[2]", + "mov w25, v7.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v7.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v9.s[1]", + "add w21, w24, w20", + "mov w20, v7.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v7.s[3]", + "add w21, w24, w20", + "mov v4.16b, v7.16b", + "mov v4.s[3], w22", + "mov v7.16b, v4.16b", + "mov v7.s[2], w18", + "mov v4.16b, v7.16b", + "mov v4.s[1], w21", + "mov v7.16b, v4.16b", + "mov v7.s[0], w30", + "mov v17.16b, v7.16b", + "mov v16.16b, v13.16b", + "ext v4.16b, v14.16b, v13.16b, #4", + "mov v16.16b, v4.16b", + "add v9.4s, v3.4s, v4.4s", + "mov v22.16b, v9.16b", + "ldr x20, [sp, #32]", + "ldr x21, [sp, #224]", + "ldr q3, [x20, x21, sxtx]", + "mov v16.16b, v3.16b", + "mov w21, v13.s[2]", + "mov w22, v13.s[3]", + "mov w23, v9.s[0]", "ror w24, w21, #17", "ror w25, w21, #19", - "eor w24, w24, w25", - "lsr w25, w21, #10", - "eor w24, w24, w25", - "add w23, w23, w24", - "mov v5.16b, v22.16b", - "mov v5.s[3], w23", - "mov v5.s[2], w22", - "mov v5.s[1], w21", - "mov v22.16b, v5.16b", - "mov v22.s[0], w20", - "add v16.4s, v16.4s, v21.4s", + "eor w30, w24, w25", + "lsr w24, w21, #10", + "eor w21, w30, w24", + "add w24, w23, w21", + "mov w21, v9.s[1]", + "ror w23, w22, #17", + "ror w25, w22, #19", + "eor w30, w23, w25", + "lsr w23, w22, #10", + "eor w22, w30, w23", + "add w23, w21, w22", + "mov w21, v9.s[2]", + "ror w22, w24, #17", + "ror w25, w24, #19", + "eor w30, w22, w25", + "lsr w22, w24, #10", + "eor w25, w30, w22", + "add w22, w21, w25", + "mov w21, v9.s[3]", + "ror w25, w23, #17", + "ror w30, w23, #19", + "eor w18, w25, w30", + "lsr w25, w23, #10", + "eor w30, w18, w25", + "add w25, w21, w30", + "mov v4.16b, v9.16b", + "mov v4.s[3], w25", + "mov v9.16b, v4.16b", + "mov v9.s[2], w22", + "mov v4.16b, v9.16b", + "mov v4.s[1], w23", + "mov v9.16b, v4.16b", + "mov v9.s[0], w24", + "mov v22.16b, v9.16b", + "add v4.4s, v3.4s, v8.4s", + "mov v16.16b, v4.16b", + "mov v0.16b, v8.16b", "unimplemented (Unimplemented)", - "mov w20, v17.s[1]", - "mov w21, v17.s[0]", - "mov w22, v20.s[1]", - "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", - "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[3]", - "mov w24, v17.s[2]", - "mov w25, v20.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v20.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v20.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v20.s[3]", - "add w20, w20, w23", - "mov v5.16b, v20.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v20.16b, v5.16b", - "mov v20.s[0], w22", - "tbl v16.16b, {v16.16b}, v4.16b", - "mov w20, v20.s[1]", - "mov w21, v20.s[0]", - "mov w22, v17.s[1]", + "mov v3.16b, v0.16b", + "mov v21.16b, v3.16b", + "mov w21, v7.s[1]", + "mov w22, v7.s[0]", + "mov w23, v12.s[1]", + "and w24, w21, w22", + "bic w25, w23, w21", + "eor w23, w24, w25", + "ror w24, w21, #6", + "eor w25, w24, w21, ror #11", + "eor w24, w25, w21, ror #25", + "add w25, w23, w24", + "mov w23, v4.s[0]", + "add w24, w25, w23", + "mov w23, v12.s[0]", + "add w25, w24, w23", + "mov w23, v7.s[3]", + "mov w24, v7.s[2]", + "mov w30, v12.s[3]", + "and w18, w24, w30", + "orr w20, w24, w30", + "and w30, w23, w20", + "orr w20, w30, w18", + "add w30, w25, w20", + "ror w20, w23, #2", + "eor w18, w20, w23, ror #13", + "eor w20, w18, w23, ror #22", + "add w18, w30, w20", + "mov w20, v12.s[2]", + "add w30, w25, w20", + "and w20, w30, w21", + "bic w21, w22, w30", + "eor w22, w20, w21", + "ror w20, w30, #6", + "eor w21, w20, w30, ror #11", + "eor w20, w21, w30, ror #25", + "add w21, w22, w20", + "mov w20, v4.s[1]", + "add w22, w21, w20", + "mov w20, v12.s[1]", + "add w21, w22, w20", + "and w20, w23, w24", + "orr w22, w23, w24", + "and w23, w18, w22", + "orr w22, w23, w20", + "add w20, w21, w22", + "ror w22, w18, #2", + "eor w23, w22, w18, ror #13", + "eor w22, w23, w18, ror #22", + "add w23, w20, w22", + "mov w20, v12.s[3]", + "add w22, w21, w20", + "mov v8.16b, v12.16b", + "mov v8.s[3], w23", + "mov v12.16b, v8.16b", + "mov v12.s[2], w18", + "mov v8.16b, v12.16b", + "mov v8.s[1], w22", + "mov v12.16b, v8.16b", + "mov v12.s[0], w30", + "mov v20.16b, v12.16b", + "tbl v8.16b, {v4.16b}, v5.16b", + "mov v16.16b, v8.16b", + "mov w20, v12.s[1]", + "mov w21, v12.s[0]", + "mov w22, v7.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[3]", - "mov w24, v20.s[2]", - "mov w25, v17.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v17.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v17.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v17.s[3]", - "add w20, w20, w23", - "mov v5.16b, v17.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v17.16b, v5.16b", - "mov v17.s[0], w22", - "mov v16.16b, v22.16b", - "ext v16.16b, v23.16b, v22.16b, #4", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v8.s[0]", + "add w23, w24, w22", + "mov w22, v7.s[0]", + "add w24, w23, w22", + "mov w22, v12.s[3]", + "mov w23, v12.s[2]", + "mov w25, v7.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v7.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v8.s[1]", + "add w21, w24, w20", + "mov w20, v7.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v7.s[3]", + "add w21, w24, w20", + "mov v4.16b, v7.16b", + "mov v4.s[3], w22", + "mov v7.16b, v4.16b", + "mov v7.s[2], w18", + "mov v4.16b, v7.16b", + "mov v4.s[1], w21", + "mov v7.16b, v4.16b", + "mov v7.s[0], w30", + "mov v17.16b, v7.16b", + "mov v16.16b, v9.16b", + "ext v4.16b, v13.16b, v9.16b, #4", + "mov v16.16b, v4.16b", "mov w20, #0x1000", "movk w20, #0x1, lsl #16", - "ldr q5, [x29, x20, sxtx]", - "add v23.4s, v23.4s, v5.4s", - "add v21.4s, v21.4s, v16.4s", - "ldr q16, [x29, x20, sxtx]", - "mov w21, v22.s[2]", - "mov w22, v22.s[3]", - "mov w23, v21.s[0]", - "ror w24, w21, #17", - "ror w25, w21, #19", - "eor w24, w24, w25", - "lsr w21, w21, #10", - "eor w21, w24, w21", - "add w21, w23, w21", - "mov w23, v21.s[1]", - "ror w24, w22, #17", - "ror w25, w22, #19", - "eor w24, w24, w25", - "lsr w22, w22, #10", - "eor w22, w24, w22", - "add w22, w23, w22", - "mov w23, v21.s[2]", - "ror w24, w21, #17", - "ror w25, w21, #19", - "eor w24, w24, w25", - "lsr w25, w21, #10", - "eor w24, w24, w25", - "add w23, w23, w24", - "mov w24, v21.s[3]", + "ldr x21, [sp, #32]", + "ldr q8, [x21, x20, sxtx]", + "add v15.4s, v13.4s, v8.4s", + "mov v23.16b, v15.16b", + "add v8.4s, v3.4s, v4.4s", + "mov v21.16b, v8.16b", + "ldr q3, [x21, x20, sxtx]", + "mov v16.16b, v3.16b", + "mov w22, v9.s[2]", + "mov w23, v9.s[3]", + "mov w24, v8.s[0]", "ror w25, w22, #17", "ror w30, w22, #19", - "eor w25, w25, w30", - "lsr w30, w22, #10", - "eor w25, w25, w30", - "add w24, w24, w25", - "mov v5.16b, v21.16b", - "mov v5.s[3], w24", - "mov v5.s[2], w23", - "mov v5.s[1], w22", - "mov v21.16b, v5.16b", - "mov v21.s[0], w21", - "ldr q5, [x29, x20, sxtx]", - "add v22.4s, v22.4s, v5.4s", - "ldr q5, [x29, x20, sxtx]", - "add v21.4s, v21.4s, v5.4s", - "add v16.4s, v16.4s, v24.4s", - "mov w20, v17.s[1]", - "mov w21, v17.s[0]", - "mov w22, v20.s[1]", + "eor w18, w25, w30", + "lsr w25, w22, #10", + "eor w22, w18, w25", + "add w25, w24, w22", + "mov w22, v8.s[1]", + "ror w24, w23, #17", + "ror w30, w23, #19", + "eor w18, w24, w30", + "lsr w24, w23, #10", + "eor w23, w18, w24", + "add w24, w22, w23", + "mov w22, v8.s[2]", + "ror w23, w25, #17", + "ror w30, w25, #19", + "eor w18, w23, w30", + "lsr w23, w25, #10", + "eor w30, w18, w23", + "add w23, w22, w30", + "mov w22, v8.s[3]", + "ror w30, w24, #17", + "ror w18, w24, #19", + "str x20, [sp, #256]", + "eor w20, w30, w18", + "lsr w30, w24, #10", + "eor w18, w20, w30", + "add w20, w22, w18", + "mov v4.16b, v8.16b", + "mov v4.s[3], w20", + "mov v8.16b, v4.16b", + "mov v8.s[2], w23", + "mov v4.16b, v8.16b", + "mov v4.s[1], w24", + "mov v8.16b, v4.16b", + "mov v8.s[0], w25", + "mov v21.16b, v8.16b", + "ldr x20, [sp, #256]", + "ldr q4, [x21, x20, sxtx]", + "add v13.4s, v9.4s, v4.4s", + "mov v22.16b, v13.16b", + "ldr q4, [x21, x20, sxtx]", + "add v9.4s, v8.4s, v4.4s", + "mov v21.16b, v9.16b", + "add v4.4s, v3.4s, v14.4s", + "mov v16.16b, v4.16b", + "mov w20, v7.s[1]", + "mov w21, v7.s[0]", + "mov w22, v12.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[3]", - "mov w24, v17.s[2]", - "mov w25, v20.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v20.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v20.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v20.s[3]", - "add w20, w20, w23", - "mov v5.16b, v20.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v20.16b, v5.16b", - "mov v20.s[0], w22", - "tbl v16.16b, {v16.16b}, v4.16b", - "mov w20, v20.s[1]", - "mov w21, v20.s[0]", - "mov w22, v17.s[1]", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v4.s[0]", + "add w23, w24, w22", + "mov w22, v12.s[0]", + "add w24, w23, w22", + "mov w22, v7.s[3]", + "mov w23, v7.s[2]", + "mov w25, v12.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v12.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v4.s[1]", + "add w21, w24, w20", + "mov w20, v12.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v12.s[3]", + "add w21, w24, w20", + "mov v3.16b, v12.16b", + "mov v3.s[3], w22", + "mov v8.16b, v3.16b", + "mov v8.s[2], w18", + "mov v3.16b, v8.16b", + "mov v3.s[1], w21", + "mov v8.16b, v3.16b", + "mov v8.s[0], w30", + "mov v20.16b, v8.16b", + "tbl v3.16b, {v4.16b}, v5.16b", + "mov v16.16b, v3.16b", + "mov w20, v8.s[1]", + "mov w21, v8.s[0]", + "mov w22, v7.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[3]", - "mov w24, v20.s[2]", - "mov w25, v17.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v17.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v17.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v17.s[3]", - "add w20, w20, w23", - "mov v5.16b, v17.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v17.16b, v5.16b", - "mov v17.s[0], w22", - "mov v16.16b, v23.16b", - "mov w20, v17.s[1]", - "mov w21, v17.s[0]", - "mov w22, v20.s[1]", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v3.s[0]", + "add w23, w24, w22", + "mov w22, v7.s[0]", + "add w24, w23, w22", + "mov w22, v8.s[3]", + "mov w23, v8.s[2]", + "mov w25, v7.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v7.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v3.s[1]", + "add w21, w24, w20", + "mov w20, v7.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v7.s[3]", + "add w21, w24, w20", + "mov v3.16b, v7.16b", + "mov v3.s[3], w22", + "mov v4.16b, v3.16b", + "mov v4.s[2], w18", + "mov v3.16b, v4.16b", + "mov v3.s[1], w21", + "mov v4.16b, v3.16b", + "mov v4.s[0], w30", + "mov v17.16b, v4.16b", + "mov v16.16b, v15.16b", + "mov w20, v4.s[1]", + "mov w21, v4.s[0]", + "mov w22, v8.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v23.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[3]", - "mov w24, v17.s[2]", - "mov w25, v20.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v20.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v23.s[1]", - "add w20, w20, w21", - "mov w21, v20.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v20.s[3]", - "add w20, w20, w23", - "mov v5.16b, v20.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v20.16b, v5.16b", - "mov v20.s[0], w22", - "tbl v16.16b, {v23.16b}, v4.16b", - "mov w20, v20.s[1]", - "mov w21, v20.s[0]", - "mov w22, v17.s[1]", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v15.s[0]", + "add w23, w24, w22", + "mov w22, v8.s[0]", + "add w24, w23, w22", + "mov w22, v4.s[3]", + "mov w23, v4.s[2]", + "mov w25, v8.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v8.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v15.s[1]", + "add w21, w24, w20", + "mov w20, v8.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v8.s[3]", + "add w21, w24, w20", + "mov v3.16b, v8.16b", + "mov v3.s[3], w22", + "mov v7.16b, v3.16b", + "mov v7.s[2], w18", + "mov v3.16b, v7.16b", + "mov v3.s[1], w21", + "mov v7.16b, v3.16b", + "mov v7.s[0], w30", + "mov v20.16b, v7.16b", + "tbl v3.16b, {v15.16b}, v5.16b", + "mov v16.16b, v3.16b", + "mov w20, v7.s[1]", + "mov w21, v7.s[0]", + "mov w22, v4.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[3]", - "mov w24, v20.s[2]", - "mov w25, v17.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v17.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v17.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v17.s[3]", - "add w20, w20, w23", - "mov v5.16b, v17.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v17.16b, v5.16b", - "mov v17.s[0], w22", - "mov v16.16b, v22.16b", - "mov w20, v17.s[1]", - "mov w21, v17.s[0]", - "mov w22, v20.s[1]", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v3.s[0]", + "add w23, w24, w22", + "mov w22, v4.s[0]", + "add w24, w23, w22", + "mov w22, v7.s[3]", + "mov w23, v7.s[2]", + "mov w25, v4.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v4.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v3.s[1]", + "add w21, w24, w20", + "mov w20, v4.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v4.s[3]", + "add w21, w24, w20", + "mov v3.16b, v4.16b", + "mov v3.s[3], w22", + "mov v4.16b, v3.16b", + "mov v4.s[2], w18", + "mov v3.16b, v4.16b", + "mov v3.s[1], w21", + "mov v4.16b, v3.16b", + "mov v4.s[0], w30", + "mov v17.16b, v4.16b", + "mov v16.16b, v13.16b", + "mov w20, v4.s[1]", + "mov w21, v4.s[0]", + "mov w22, v7.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v22.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[3]", - "mov w24, v17.s[2]", - "mov w25, v20.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v20.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v22.s[1]", - "add w20, w20, w21", - "mov w21, v20.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v20.s[3]", - "add w20, w20, w23", - "mov v5.16b, v20.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v20.16b, v5.16b", - "mov v20.s[0], w22", - "tbl v16.16b, {v22.16b}, v4.16b", - "mov w20, v20.s[1]", - "mov w21, v20.s[0]", - "mov w22, v17.s[1]", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v13.s[0]", + "add w23, w24, w22", + "mov w22, v7.s[0]", + "add w24, w23, w22", + "mov w22, v4.s[3]", + "mov w23, v4.s[2]", + "mov w25, v7.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v7.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v13.s[1]", + "add w21, w24, w20", + "mov w20, v7.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v7.s[3]", + "add w21, w24, w20", + "mov v3.16b, v7.16b", + "mov v3.s[3], w22", + "mov v7.16b, v3.16b", + "mov v7.s[2], w18", + "mov v3.16b, v7.16b", + "mov v3.s[1], w21", + "mov v7.16b, v3.16b", + "mov v7.s[0], w30", + "mov v20.16b, v7.16b", + "tbl v3.16b, {v13.16b}, v5.16b", + "mov v16.16b, v3.16b", + "mov w20, v7.s[1]", + "mov w21, v7.s[0]", + "mov w22, v4.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[3]", - "mov w24, v20.s[2]", - "mov w25, v17.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v17.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v17.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v17.s[3]", - "add w20, w20, w23", - "mov v5.16b, v17.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v17.16b, v5.16b", - "mov v17.s[0], w22", - "mov v16.16b, v21.16b", - "mov w20, v17.s[1]", - "mov w21, v17.s[0]", - "mov w22, v20.s[1]", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v3.s[0]", + "add w23, w24, w22", + "mov w22, v4.s[0]", + "add w24, w23, w22", + "mov w22, v7.s[3]", + "mov w23, v7.s[2]", + "mov w25, v4.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v4.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v3.s[1]", + "add w21, w24, w20", + "mov w20, v4.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v4.s[3]", + "add w21, w24, w20", + "mov v3.16b, v4.16b", + "mov v3.s[3], w22", + "mov v4.16b, v3.16b", + "mov v4.s[2], w18", + "mov v3.16b, v4.16b", + "mov v3.s[1], w21", + "mov v4.16b, v3.16b", + "mov v4.s[0], w30", + "mov v17.16b, v4.16b", + "mov v16.16b, v9.16b", + "mov w20, v4.s[1]", + "mov w21, v4.s[0]", + "mov w22, v7.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v21.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[3]", - "mov w24, v17.s[2]", - "mov w25, v20.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v20.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v21.s[1]", - "add w20, w20, w21", - "mov w21, v20.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v20.s[3]", - "add w20, w20, w23", - "mov v5.16b, v20.16b", - "mov v5.s[3], w21", - "mov v5.s[2], w25", - "mov v5.s[1], w20", - "mov v20.16b, v5.16b", - "mov v20.s[0], w22", - "tbl v16.16b, {v21.16b}, v4.16b", - "mov w20, v20.s[1]", - "mov w21, v20.s[0]", - "mov w22, v17.s[1]", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v9.s[0]", + "add w23, w24, w22", + "mov w22, v7.s[0]", + "add w24, w23, w22", + "mov w22, v4.s[3]", + "mov w23, v4.s[2]", + "mov w25, v7.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v7.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v9.s[1]", + "add w21, w24, w20", + "mov w20, v7.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v7.s[3]", + "add w21, w24, w20", + "mov v3.16b, v7.16b", + "mov v3.s[3], w22", + "mov v7.16b, v3.16b", + "mov v7.s[2], w18", + "mov v3.16b, v7.16b", + "mov v3.s[1], w21", + "mov v7.16b, v3.16b", + "mov v7.s[0], w30", + "mov v20.16b, v7.16b", + "tbl v3.16b, {v9.16b}, v5.16b", + "mov v16.16b, v3.16b", + "mov w20, v7.s[1]", + "mov w21, v7.s[0]", + "mov w22, v4.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[0]", - "add w22, w22, w23", - "mov w23, v20.s[3]", - "mov w24, v20.s[2]", - "mov w25, v17.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v17.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v17.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v17.s[3]", - "add w20, w20, w23", - "mov v4.16b, v17.16b", - "mov v4.s[3], w21", - "mov v4.s[2], w25", - "mov v4.s[1], w20", + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v3.s[0]", + "add w23, w24, w22", + "mov w22, v4.s[0]", + "add w24, w23, w22", + "mov w22, v7.s[3]", + "mov w23, v7.s[2]", + "mov w25, v4.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v4.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v3.s[1]", + "add w21, w24, w20", + "mov w20, v4.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v4.s[3]", + "add w21, w24, w20", + "mov v3.16b, v4.16b", + "mov v3.s[3], w22", + "mov v4.16b, v3.16b", + "mov v4.s[2], w18", + "mov v3.16b, v4.16b", + "mov v3.s[1], w21", + "mov v4.16b, v3.16b", + "mov v4.s[0], w30", "mov v17.16b, v4.16b", - "mov v17.s[0], w22", - "add v20.4s, v20.4s, v18.4s", - "add v17.4s, v17.4s, v19.4s", - "tbl v20.16b, {v20.16b}, v2.16b", - "tbl v17.16b, {v17.16b}, v3.16b", - "mov v16.16b, v17.16b", - "mov v16.16b, v17.16b", - "mov v16.d[1], v20.d[1]", - "ext v20.16b, v17.16b, v20.16b, #8", - "str q16, [x11, #256]", - "str q20, [x11, #272]" + "add v3.4s, v7.4s, v11.4s", + "mov v20.16b, v3.16b", + "add v5.4s, v4.4s, v10.4s", + "mov v17.16b, v5.16b", + "tbl v4.16b, {v3.16b}, v6.16b", + "mov v20.16b, v4.16b", + "tbl v3.16b, {v5.16b}, v2.16b", + "mov v17.16b, v3.16b", + "mov v16.16b, v3.16b", + "mov v2.16b, v3.16b", + "mov v2.d[1], v4.d[1]", + "mov v16.16b, v2.16b", + "ext v5.16b, v3.16b, v4.16b, #8", + "mov v20.16b, v5.16b", + "ldr x20, [sp]", + "str q2, [x20, #256]", + "str q5, [x20, #272]", + "add sp, sp, #0x120 (288)" ] } } diff --git a/unittests/InstructionCountCI/FlagM/Atomics.json b/unittests/InstructionCountCI/FlagM/Atomics.json index d535a3386d..a17f2e3c39 100644 --- a/unittests/InstructionCountCI/FlagM/Atomics.json +++ b/unittests/InstructionCountCI/FlagM/Atomics.json @@ -12,1464 +12,1802 @@ }, "Instructions": { "lock add byte [rax], cl": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x00", "ExpectedArm64ASM": [ - "ldaddalb w5, w20, [x4]", - "eor w27, w20, w5", - "lsl w0, w20, #24", - "cmn w0, w5, lsl #24", - "add w26, w20, w5" + "mov x20, x5", + "mov x21, x4", + "ldaddalb w20, w22, [x21]", + "eor w21, w22, w20", + "mov x27, x21", + "lsl w0, w22, #24", + "cmn w0, w20, lsl #24", + "add w21, w22, w20", + "mov x26, x21" ] }, "lock add word [rax], cx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x01", "ExpectedArm64ASM": [ - "ldaddalh w5, w20, [x4]", - "eor w27, w20, w5", - "lsl w0, w20, #16", - "cmn w0, w5, lsl #16", - "add w26, w20, w5" + "mov x20, x5", + "mov x21, x4", + "ldaddalh w20, w22, [x21]", + "eor w21, w22, w20", + "mov x27, x21", + "lsl w0, w22, #16", + "cmn w0, w20, lsl #16", + "add w21, w22, w20", + "mov x26, x21" ] }, "lock add dword [rax], ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x01", "ExpectedArm64ASM": [ - "ldaddal w5, w20, [x4]", - "eor w27, w20, w5", - "adds w26, w20, w5" + "mov x20, x5", + "mov x21, x4", + "ldaddal w20, w22, [x21]", + "eor w21, w22, w20", + "mov x27, x21", + "adds w21, w22, w20", + "mov x26, x21" ] }, "lock or byte [rax], cl": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x08", "ExpectedArm64ASM": [ - "ldsetalb w5, w20, [x4]", - "orr w26, w20, w5", - "cmn wzr, w26, lsl #24" + "mov x20, x5", + "mov x21, x4", + "ldsetalb w20, w22, [x21]", + "orr w21, w22, w20", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "lock or word [rax], cx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x09", "ExpectedArm64ASM": [ - "ldsetalh w5, w20, [x4]", - "orr w26, w20, w5", - "cmn wzr, w26, lsl #16" + "mov x20, x5", + "mov x21, x4", + "ldsetalh w20, w22, [x21]", + "orr w21, w22, w20", + "mov x26, x21", + "cmn wzr, w21, lsl #16" ] }, "lock or dword [rax], ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x09", "ExpectedArm64ASM": [ - "ldsetal w5, w20, [x4]", - "orr w26, w20, w5", - "tst w26, w26" + "mov x20, x5", + "mov x21, x4", + "ldsetal w20, w22, [x21]", + "orr w21, w22, w20", + "mov x26, x21", + "tst w21, w21" ] }, "lock adc byte [rax], cl": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 22, "Comment": "0x10", "ExpectedArm64ASM": [ - "adc w20, wzr, w5", - "ldaddalb w20, w20, [x4]", - "eor w27, w20, w5", + "mov x20, x5", + "adc w21, wzr, w20", + "mov x22, x4", + "ldaddalb w21, w23, [x22]", + "eor w21, w23, w20", + "mov x27, x21", "cset w21, hs", - "adc w22, w20, w5", - "uxtb w26, w22", - "cmp x26, x5", + "adc w22, w23, w20", + "uxtb w24, w22", + "cmp x24, x20", "cset x22, lo", - "cmp x26, x5", - "cset x23, ls", + "cmp x24, x20", + "cset x25, ls", "cmp x21, #0x1 (1)", - "csel x21, x23, x22, eq", - "cmn wzr, w26, lsl #24", - "rmif x21, #63, #nzCv", - "eor w21, w20, w5", - "eor w20, w26, w20", - "bic w20, w20, w21", - "rmif x20, #7, #nzcV" + "csel x30, x25, x22, eq", + "cmn wzr, w24, lsl #24", + "rmif x30, #63, #nzCv", + "eor w21, w23, w20", + "eor w20, w24, w23", + "bic w22, w20, w21", + "rmif x22, #7, #nzcV", + "mov x26, x24" ] }, "lock adc word [rax], cx": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 22, "Comment": "0x11", "ExpectedArm64ASM": [ - "adc w20, wzr, w5", - "ldaddalh w20, w20, [x4]", - "eor w27, w20, w5", + "mov x20, x5", + "adc w21, wzr, w20", + "mov x22, x4", + "ldaddalh w21, w23, [x22]", + "eor w21, w23, w20", + "mov x27, x21", "cset w21, hs", - "adc w22, w20, w5", - "uxth w26, w22", - "cmp x26, x5", + "adc w22, w23, w20", + "uxth w24, w22", + "cmp x24, x20", "cset x22, lo", - "cmp x26, x5", - "cset x23, ls", + "cmp x24, x20", + "cset x25, ls", "cmp x21, #0x1 (1)", - "csel x21, x23, x22, eq", - "cmn wzr, w26, lsl #16", - "rmif x21, #63, #nzCv", - "eor w21, w20, w5", - "eor w20, w26, w20", - "bic w20, w20, w21", - "rmif x20, #15, #nzcV" + "csel x30, x25, x22, eq", + "cmn wzr, w24, lsl #16", + "rmif x30, #63, #nzCv", + "eor w21, w23, w20", + "eor w20, w24, w23", + "bic w22, w20, w21", + "rmif x22, #15, #nzcV", + "mov x26, x24" ] }, "lock adc dword [rax], ecx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": "0x11", "ExpectedArm64ASM": [ - "adc w20, wzr, w5", - "ldaddal w20, w20, [x4]", - "eor w27, w20, w5", - "adcs w26, w20, w5" + "mov x20, x5", + "adc w21, wzr, w20", + "mov x22, x4", + "ldaddal w21, w23, [x22]", + "eor w21, w23, w20", + "mov x27, x21", + "adcs w21, w23, w20", + "mov x26, x21" ] }, "lock sbb byte [rax], cl": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 24, "Comment": "0x18", "ExpectedArm64ASM": [ - "adc w20, wzr, w5", - "neg w1, w20", - "ldaddalb w1, w20, [x4]", - "eor w27, w20, w5", + "mov x20, x5", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddalb w1, w23, [x21]", + "eor w21, w23, w20", + "mov x27, x21", "cset w21, hs", - "add w22, w5, w21", - "sub w22, w20, w22", - "uxtb w26, w22", - "cmp w26, w20", - "cset x22, hi", - "cmp w26, w20", - "cset x23, hs", + "add w22, w20, w21", + "sub w24, w23, w22", + "uxtb w22, w24", + "cmp w22, w23", + "cset x24, hi", + "cmp w22, w23", + "cset x25, hs", "cmp x21, #0x1 (1)", - "csel x21, x23, x22, eq", - "cmn wzr, w26, lsl #24", - "rmif x21, #63, #nzCv", - "eor w21, w20, w5", - "eor w20, w26, w20", - "and w20, w20, w21", - "rmif x20, #7, #nzcV" + "csel x30, x25, x24, eq", + "cmn wzr, w22, lsl #24", + "rmif x30, #63, #nzCv", + "eor w21, w23, w20", + "eor w20, w22, w23", + "and w23, w20, w21", + "rmif x23, #7, #nzcV", + "mov x26, x22" ] }, "lock sbb word [rax], cx": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 24, "Comment": "0x19", "ExpectedArm64ASM": [ - "adc w20, wzr, w5", - "neg w1, w20", - "ldaddalh w1, w20, [x4]", - "eor w27, w20, w5", + "mov x20, x5", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddalh w1, w23, [x21]", + "eor w21, w23, w20", + "mov x27, x21", "cset w21, hs", - "add w22, w5, w21", - "sub w22, w20, w22", - "uxth w26, w22", - "cmp w26, w20", - "cset x22, hi", - "cmp w26, w20", - "cset x23, hs", + "add w22, w20, w21", + "sub w24, w23, w22", + "uxth w22, w24", + "cmp w22, w23", + "cset x24, hi", + "cmp w22, w23", + "cset x25, hs", "cmp x21, #0x1 (1)", - "csel x21, x23, x22, eq", - "cmn wzr, w26, lsl #16", - "rmif x21, #63, #nzCv", - "eor w21, w20, w5", - "eor w20, w26, w20", - "and w20, w20, w21", - "rmif x20, #15, #nzcV" + "csel x30, x25, x24, eq", + "cmn wzr, w22, lsl #16", + "rmif x30, #63, #nzCv", + "eor w21, w23, w20", + "eor w20, w22, w23", + "and w23, w20, w21", + "rmif x23, #15, #nzcV", + "mov x26, x22" ] }, "lock sbb dword [rax], ecx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x19", "ExpectedArm64ASM": [ - "adc w20, wzr, w5", - "neg w1, w20", - "ldaddal w1, w20, [x4]", - "eor w27, w20, w5", + "mov x20, x5", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddal w1, w23, [x21]", + "eor w21, w23, w20", + "mov x27, x21", "cfinv", - "sbcs w26, w20, w5", - "cfinv" + "sbcs w21, w23, w20", + "cfinv", + "mov x26, x21" ] }, "lock and byte [rax], cl": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x20", "ExpectedArm64ASM": [ - "mvn w1, w5", - "ldclralb w1, w20, [x4]", - "and w26, w20, w5", - "cmn wzr, w26, lsl #24" + "mov x20, x5", + "mov x21, x4", + "mvn w1, w20", + "ldclralb w1, w22, [x21]", + "and w21, w22, w20", + "cmn wzr, w21, lsl #24", + "mov x26, x21" ] }, "lock and word [rax], cx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x21", "ExpectedArm64ASM": [ - "mvn w1, w5", - "ldclralh w1, w20, [x4]", - "and w26, w20, w5", - "cmn wzr, w26, lsl #16" + "mov x20, x5", + "mov x21, x4", + "mvn w1, w20", + "ldclralh w1, w22, [x21]", + "and w21, w22, w20", + "cmn wzr, w21, lsl #16", + "mov x26, x21" ] }, "lock and dword [rax], ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x21", "ExpectedArm64ASM": [ - "mvn w1, w5", - "ldclral w1, w20, [x4]", - "ands w26, w20, w5" + "mov x20, x5", + "mov x21, x4", + "mvn w1, w20", + "ldclral w1, w22, [x21]", + "ands w21, w22, w20", + "mov x26, x21" ] }, "lock sub byte [rax], cl": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x28", "ExpectedArm64ASM": [ - "neg w1, w5", - "ldaddalb w1, w20, [x4]", - "eor w27, w20, w5", - "lsl w0, w20, #24", - "cmp w0, w5, lsl #24", - "sub w26, w20, w5", + "mov x20, x5", + "mov x21, x4", + "neg w1, w20", + "ldaddalb w1, w22, [x21]", + "eor w21, w22, w20", + "mov x27, x21", + "lsl w0, w22, #24", + "cmp w0, w20, lsl #24", + "sub w21, w22, w20", + "mov x26, x21", "cfinv" ] }, "lock sub word [rax], cx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x28", "ExpectedArm64ASM": [ - "neg w1, w5", - "ldaddalh w1, w20, [x4]", - "eor w27, w20, w5", - "lsl w0, w20, #16", - "cmp w0, w5, lsl #16", - "sub w26, w20, w5", + "mov x20, x5", + "mov x21, x4", + "neg w1, w20", + "ldaddalh w1, w22, [x21]", + "eor w21, w22, w20", + "mov x27, x21", + "lsl w0, w22, #16", + "cmp w0, w20, lsl #16", + "sub w21, w22, w20", + "mov x26, x21", "cfinv" ] }, "lock sub dword [rax], ecx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x29", "ExpectedArm64ASM": [ - "neg w1, w5", - "ldaddal w1, w20, [x4]", - "eor w27, w20, w5", - "subs w26, w20, w5", + "mov x20, x5", + "mov x21, x4", + "neg w1, w20", + "ldaddal w1, w22, [x21]", + "eor w21, w22, w20", + "mov x27, x21", + "subs w21, w22, w20", + "mov x26, x21", "cfinv" ] }, "lock xor byte [rax], cl": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x30", "ExpectedArm64ASM": [ - "ldeoralb w5, w20, [x4]", - "eor w26, w20, w5", - "cmn wzr, w26, lsl #24" + "mov x20, x5", + "mov x21, x4", + "ldeoralb w20, w22, [x21]", + "eor w21, w22, w20", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "lock xor word [rax], cx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x31", "ExpectedArm64ASM": [ - "ldeoralh w5, w20, [x4]", - "eor w26, w20, w5", - "cmn wzr, w26, lsl #16" + "mov x20, x5", + "mov x21, x4", + "ldeoralh w20, w22, [x21]", + "eor w21, w22, w20", + "mov x26, x21", + "cmn wzr, w21, lsl #16" ] }, "lock xor dword [rax], ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x31", "ExpectedArm64ASM": [ - "ldeoral w5, w20, [x4]", - "eor w26, w20, w5", - "tst w26, w26" + "mov x20, x5", + "mov x21, x4", + "ldeoral w20, w22, [x21]", + "eor w21, w22, w20", + "mov x26, x21", + "tst w21, w21" ] }, "lock add qword [rax], rcx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x01", "ExpectedArm64ASM": [ - "ldaddal x5, x20, [x4]", - "eor w27, w20, w5", - "adds x26, x20, x5" + "mov x20, x5", + "mov x21, x4", + "ldaddal x20, x22, [x21]", + "eor w21, w22, w20", + "mov x27, x21", + "adds x21, x22, x20", + "mov x26, x21" ] }, "xadd byte [rax], bl": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xc0", "ExpectedArm64ASM": [ - "uxtb w20, w7", - "ldaddalb w20, w21, [x4]", - "bfxil x7, x21, #0, #8", - "eor w27, w21, w20", - "lsl w0, w21, #24", - "cmn w0, w20, lsl #24", - "add w26, w21, w20" + "mov x20, x4", + "mov x21, x7", + "uxtb w22, w21", + "ldaddalb w22, w23, [x20]", + "mov x20, x21", + "bfxil x20, x23, #0, #8", + "mov x7, x20", + "eor w20, w23, w22", + "mov x27, x20", + "lsl w0, w23, #24", + "cmn w0, w22, lsl #24", + "add w20, w23, w22", + "mov x26, x20" ] }, "xadd word [rax], bx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ - "uxth w20, w7", - "ldaddalh w20, w21, [x4]", - "bfxil x7, x21, #0, #16", - "eor w27, w21, w20", - "lsl w0, w21, #16", - "cmn w0, w20, lsl #16", - "add w26, w21, w20" + "mov x20, x4", + "mov x21, x7", + "uxth w22, w21", + "ldaddalh w22, w23, [x20]", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x7, x20", + "eor w20, w23, w22", + "mov x27, x20", + "lsl w0, w23, #16", + "cmn w0, w22, lsl #16", + "add w20, w23, w22", + "mov x26, x20" ] }, "xadd dword [rax], ebx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ - "mov w20, w7", - "ldaddal w20, w7, [x4]", - "eor w27, w7, w20", - "adds w26, w7, w20" + "mov x20, x4", + "mov x21, x7", + "mov w22, w21", + "ldaddal w22, w21, [x20]", + "mov x7, x21", + "eor w20, w21, w22", + "mov x27, x20", + "adds w20, w21, w22", + "mov x26, x20" ] }, "xadd qword [rax], rbx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ - "mov x20, x7", - "ldaddal x20, x7, [x4]", - "eor w27, w7, w20", - "adds x26, x7, x20" + "mov x20, x4", + "mov x21, x7", + "ldaddal x21, x22, [x20]", + "mov x7, x22", + "eor w20, w22, w21", + "mov x27, x20", + "adds x20, x22, x21", + "mov x26, x20" ] }, "lock add byte [rax], 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x80 /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldaddalb w20, w27, [x4]", - "lsl w0, w27, #24", + "mov x21, x4", + "ldaddalb w20, w22, [x21]", + "mov x27, x22", + "lsl w0, w22, #24", "cmn w0, w20, lsl #24", - "add w26, w27, #0x1 (1)" + "add w20, w22, #0x1 (1)", + "mov x26, x20" ] }, "lock add byte [rax], 0xFF": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "GROUP1 0x80 /0", "ExpectedArm64ASM": [ "mov w20, #0xff", - "ldaddalb w20, w21, [x4]", - "mvn w27, w21", - "lsl w0, w21, #24", + "mov x21, x4", + "ldaddalb w20, w22, [x21]", + "mvn w21, w22", + "mov x27, x21", + "lsl w0, w22, #24", "cmn w0, w20, lsl #24", - "add w26, w21, #0xff (255)" + "add w20, w22, #0xff (255)", + "mov x26, x20" ] }, "lock add word [rax], 0x100": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldaddalh w20, w27, [x4]", - "lsl w0, w27, #16", + "mov x21, x4", + "ldaddalh w20, w22, [x21]", + "mov x27, x22", + "lsl w0, w22, #16", "cmn w0, w20, lsl #16", - "add w26, w27, #0x100 (256)" + "add w20, w22, #0x100 (256)", + "mov x26, x20" ] }, "lock add word [rax], 0xFFFF": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "ldaddalh w20, w21, [x4]", - "mvn w27, w21", - "lsl w0, w21, #16", + "mov x21, x4", + "ldaddalh w20, w22, [x21]", + "mvn w21, w22", + "mov x27, x21", + "lsl w0, w22, #16", "cmn w0, w20, lsl #16", - "add w26, w21, w20" + "add w21, w22, w20", + "mov x26, x21" ] }, "lock add dword [rax], 0x100": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldaddal w20, w27, [x4]", - "adds w26, w27, #0x100 (256)" + "mov x21, x4", + "ldaddal w20, w22, [x21]", + "mov x27, x22", + "adds w20, w22, #0x100 (256)", + "mov x26, x20" ] }, "lock add dword [rax], 0xFFFFFFFF": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "ldaddal w20, w21, [x4]", - "mvn w27, w21", - "adds w26, w21, w20" + "mov x21, x4", + "ldaddal w20, w22, [x21]", + "mvn w21, w22", + "mov x27, x21", + "adds w21, w22, w20", + "mov x26, x21" ] }, "lock add qword [rax], 0x100": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldaddal x20, x27, [x4]", - "adds x26, x27, #0x100 (256)" + "mov x21, x4", + "ldaddal x20, x22, [x21]", + "mov x27, x22", + "adds x20, x22, #0x100 (256)", + "mov x26, x20" ] }, "lock add qword [rax], -2147483647": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov x20, #0xffffffff80000001", - "ldaddal x20, x27, [x4]", - "adds x26, x27, x20" + "mov x21, x4", + "ldaddal x20, x22, [x21]", + "mov x27, x22", + "adds x21, x22, x20", + "mov x26, x21" ] }, "lock add word [rax], 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldaddalh w20, w27, [x4]", - "lsl w0, w27, #16", + "mov x21, x4", + "ldaddalh w20, w22, [x21]", + "mov x27, x22", + "lsl w0, w22, #16", "cmn w0, w20, lsl #16", - "add w26, w27, #0x1 (1)" + "add w20, w22, #0x1 (1)", + "mov x26, x20" ] }, "lock add dword [rax], 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldaddal w20, w27, [x4]", - "adds w26, w27, #0x1 (1)" + "mov x21, x4", + "ldaddal w20, w22, [x21]", + "mov x27, x22", + "adds w20, w22, #0x1 (1)", + "mov x26, x20" ] }, "lock add qword [rax], 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldaddal x20, x27, [x4]", - "adds x26, x27, #0x1 (1)" + "mov x21, x4", + "ldaddal x20, x22, [x21]", + "mov x27, x22", + "adds x20, x22, #0x1 (1)", + "mov x26, x20" ] }, "lock or byte [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x80 /1", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldsetalb w20, w20, [x4]", - "orr w26, w20, #0x1", - "cmn wzr, w26, lsl #24" + "mov x21, x4", + "ldsetalb w20, w22, [x21]", + "orr w20, w22, #0x1", + "mov x26, x20", + "cmn wzr, w20, lsl #24" ] }, "lock or byte [rax], 0xFF": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x80 /1", "ExpectedArm64ASM": [ "mov w20, #0xff", - "ldsetalb w20, w20, [x4]", - "orr w26, w20, #0xff", - "cmn wzr, w26, lsl #24" + "mov x21, x4", + "ldsetalb w20, w22, [x21]", + "orr w20, w22, #0xff", + "mov x26, x20", + "cmn wzr, w20, lsl #24" ] }, "lock or word [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldsetalh w20, w20, [x4]", - "orr w26, w20, #0x100", - "cmn wzr, w26, lsl #16" + "mov x21, x4", + "ldsetalh w20, w22, [x21]", + "orr w20, w22, #0x100", + "mov x26, x20", + "cmn wzr, w20, lsl #16" ] }, "lock or word [rax], 0xFFFF": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "ldsetalh w20, w20, [x4]", - "orr w26, w20, #0xffff", - "cmn wzr, w26, lsl #16" + "mov x21, x4", + "ldsetalh w20, w22, [x21]", + "orr w20, w22, #0xffff", + "mov x26, x20", + "cmn wzr, w20, lsl #16" ] }, "lock or dword [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldsetal w20, w20, [x4]", - "orr w26, w20, #0x100", - "tst w26, w26" + "mov x21, x4", + "ldsetal w20, w22, [x21]", + "orr w20, w22, #0x100", + "mov x26, x20", + "tst w20, w20" ] }, "lock or dword [rax], 0xFFFFFFFF": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "ldsetal w20, w21, [x4]", - "orr w26, w21, w20", - "tst w26, w26" + "mov x21, x4", + "ldsetal w20, w22, [x21]", + "orr w21, w22, w20", + "mov x26, x21", + "tst w21, w21" ] }, "lock or qword [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldsetal x20, x20, [x4]", - "orr x26, x20, #0x100", - "tst x26, x26" + "mov x21, x4", + "ldsetal x20, x22, [x21]", + "orr x20, x22, #0x100", + "mov x26, x20", + "tst x20, x20" ] }, "lock or qword [rax], -2147483647": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ "mov x20, #0xffffffff80000001", - "ldsetal x20, x20, [x4]", - "orr x26, x20, #0xffffffff80000001", - "tst x26, x26" + "mov x21, x4", + "ldsetal x20, x22, [x21]", + "orr x20, x22, #0xffffffff80000001", + "mov x26, x20", + "tst x20, x20" ] }, "lock or word [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /1", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldsetalh w20, w20, [x4]", - "orr w26, w20, #0x1", - "cmn wzr, w26, lsl #16" + "mov x21, x4", + "ldsetalh w20, w22, [x21]", + "orr w20, w22, #0x1", + "mov x26, x20", + "cmn wzr, w20, lsl #16" ] }, "lock or dword [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /1", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldsetal w20, w20, [x4]", - "orr w26, w20, #0x1", - "tst w26, w26" + "mov x21, x4", + "ldsetal w20, w22, [x21]", + "orr w20, w22, #0x1", + "mov x26, x20", + "tst w20, w20" ] }, "lock or qword [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /1", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldsetal x20, x20, [x4]", - "orr x26, x20, #0x1", - "tst x26, x26" + "mov x21, x4", + "ldsetal x20, x22, [x21]", + "orr x20, x22, #0x1", + "mov x26, x20", + "tst x20, x20" ] }, "lock adc byte [rax], 1": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 19, "Comment": "GROUP1 0x80 /2", "ExpectedArm64ASM": [ "mov w20, #0x1", "adc w21, wzr, w20", - "ldaddalb w21, w27, [x4]", + "mov x22, x4", + "ldaddalb w21, w23, [x22]", + "mov x27, x23", "cset w21, hs", - "adc w20, w27, w20", - "uxtb w26, w20", - "cmp w26, #0x1 (1)", - "cset x20, lo", - "cmp w26, #0x1 (1)", - "cset x22, ls", + "adc w22, w23, w20", + "uxtb w20, w22", + "cmp w20, #0x1 (1)", + "cset x22, lo", + "cmp w20, #0x1 (1)", + "cset x24, ls", "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv", - "bic w20, w26, w27", - "rmif x20, #7, #nzcV" + "csel x25, x24, x22, eq", + "cmn wzr, w20, lsl #24", + "rmif x25, #63, #nzCv", + "bic w21, w20, w23", + "rmif x21, #7, #nzcV", + "mov x26, x20" ] }, "lock adc byte [rax], 0xFF": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 20, "Comment": "GROUP1 0x80 /2", "ExpectedArm64ASM": [ "mov w20, #0xff", "adc w21, wzr, w20", - "ldaddalb w21, w21, [x4]", - "mvn w27, w21", - "cset w22, hs", - "adc w20, w21, w20", - "uxtb w26, w20", - "cmp w26, #0xff (255)", - "cset x20, lo", - "cmp w26, #0xff (255)", - "cset x23, ls", - "cmp x22, #0x1 (1)", - "csel x20, x23, x20, eq", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv", - "bic w20, w21, w26", - "rmif x20, #7, #nzcV" + "mov x22, x4", + "ldaddalb w21, w23, [x22]", + "mvn w21, w23", + "mov x27, x21", + "cset w21, hs", + "adc w22, w23, w20", + "uxtb w20, w22", + "cmp w20, #0xff (255)", + "cset x22, lo", + "cmp w20, #0xff (255)", + "cset x24, ls", + "cmp x21, #0x1 (1)", + "csel x25, x24, x22, eq", + "cmn wzr, w20, lsl #24", + "rmif x25, #63, #nzCv", + "bic w21, w23, w20", + "rmif x21, #7, #nzcV", + "mov x26, x20" ] }, "lock adc word [rax], 0x100": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 19, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov w20, #0x100", "adc w21, wzr, w20", - "ldaddalh w21, w27, [x4]", + "mov x22, x4", + "ldaddalh w21, w23, [x22]", + "mov x27, x23", "cset w21, hs", - "adc w20, w27, w20", - "uxth w26, w20", - "cmp w26, #0x100 (256)", - "cset x20, lo", - "cmp w26, #0x100 (256)", - "cset x22, ls", + "adc w22, w23, w20", + "uxth w20, w22", + "cmp w20, #0x100 (256)", + "cset x22, lo", + "cmp w20, #0x100 (256)", + "cset x24, ls", "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #16", - "rmif x20, #63, #nzCv", - "bic w20, w26, w27", - "rmif x20, #15, #nzcV" + "csel x25, x24, x22, eq", + "cmn wzr, w20, lsl #16", + "rmif x25, #63, #nzCv", + "bic w21, w20, w23", + "rmif x21, #15, #nzcV", + "mov x26, x20" ] }, "lock adc word [rax], 0xFFFF": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 20, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov w20, #0xffff", "adc w21, wzr, w20", - "ldaddalh w21, w21, [x4]", - "mvn w27, w21", - "cset w22, hs", - "adc w23, w21, w20", - "uxth w26, w23", - "cmp w26, w20", - "cset x23, lo", - "cmp w26, w20", - "cset x20, ls", - "cmp x22, #0x1 (1)", - "csel x20, x20, x23, eq", - "cmn wzr, w26, lsl #16", + "mov x22, x4", + "ldaddalh w21, w23, [x22]", + "mvn w21, w23", + "mov x27, x21", + "cset w21, hs", + "adc w22, w23, w20", + "uxth w24, w22", + "cmp w24, w20", + "cset x22, lo", + "cmp w24, w20", + "cset x25, ls", + "cmp x21, #0x1 (1)", + "csel x20, x25, x22, eq", + "cmn wzr, w24, lsl #16", "rmif x20, #63, #nzCv", - "bic w20, w21, w26", - "rmif x20, #15, #nzcV" + "bic w20, w23, w24", + "rmif x20, #15, #nzcV", + "mov x26, x24" ] }, "lock adc dword [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov w20, #0x100", "adc w21, wzr, w20", - "ldaddal w21, w27, [x4]", - "adcs w26, w27, w20" + "mov x22, x4", + "ldaddal w21, w23, [x22]", + "mov x27, x23", + "adcs w21, w23, w20", + "mov x26, x21" ] }, "lock adc dword [rax], 0xFFFFFFFF": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", "adc w21, wzr, w20", - "ldaddal w21, w21, [x4]", - "mvn w27, w21", - "adcs w26, w21, w20" + "mov x22, x4", + "ldaddal w21, w23, [x22]", + "mvn w21, w23", + "mov x27, x21", + "adcs w21, w23, w20", + "mov x26, x21" ] }, "lock adc qword [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov w20, #0x100", "adc x21, xzr, x20", - "ldaddal x21, x27, [x4]", - "adcs x26, x27, x20" + "mov x22, x4", + "ldaddal x21, x23, [x22]", + "mov x27, x23", + "adcs x21, x23, x20", + "mov x26, x21" ] }, "lock adc qword [rax], -2147483647": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov x20, #0xffffffff80000001", "adc x21, xzr, x20", - "ldaddal x21, x27, [x4]", - "adcs x26, x27, x20" + "mov x22, x4", + "ldaddal x21, x23, [x22]", + "mov x27, x23", + "adcs x21, x23, x20", + "mov x26, x21" ] }, "lock adc word [rax], 1": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 19, "Comment": "GROUP1 0x83 /2", "ExpectedArm64ASM": [ "mov w20, #0x1", "adc w21, wzr, w20", - "ldaddalh w21, w27, [x4]", + "mov x22, x4", + "ldaddalh w21, w23, [x22]", + "mov x27, x23", "cset w21, hs", - "adc w20, w27, w20", - "uxth w26, w20", - "cmp w26, #0x1 (1)", - "cset x20, lo", - "cmp w26, #0x1 (1)", - "cset x22, ls", + "adc w22, w23, w20", + "uxth w20, w22", + "cmp w20, #0x1 (1)", + "cset x22, lo", + "cmp w20, #0x1 (1)", + "cset x24, ls", "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #16", - "rmif x20, #63, #nzCv", - "bic w20, w26, w27", - "rmif x20, #15, #nzcV" + "csel x25, x24, x22, eq", + "cmn wzr, w20, lsl #16", + "rmif x25, #63, #nzCv", + "bic w21, w20, w23", + "rmif x21, #15, #nzcV", + "mov x26, x20" ] }, "lock adc dword [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x83 /2", "ExpectedArm64ASM": [ "mov w20, #0x1", "adc w21, wzr, w20", - "ldaddal w21, w27, [x4]", - "adcs w26, w27, w20" + "mov x22, x4", + "ldaddal w21, w23, [x22]", + "mov x27, x23", + "adcs w21, w23, w20", + "mov x26, x21" ] }, "lock adc qword [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x83 /2", "ExpectedArm64ASM": [ "mov w20, #0x1", "adc x21, xzr, x20", - "ldaddal x21, x27, [x4]", - "adcs x26, x27, x20" + "mov x22, x4", + "ldaddal x21, x23, [x22]", + "mov x27, x23", + "adcs x21, x23, x20", + "mov x26, x21" ] }, "lock sbb byte [rax], 1": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 21, "Comment": "GROUP1 0x80 /3", "ExpectedArm64ASM": [ "mov w20, #0x1", - "adc w21, wzr, w20", - "neg w1, w21", - "ldaddalb w1, w27, [x4]", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddalb w1, w23, [x21]", + "mov x27, x23", "cset w21, hs", - "add w20, w20, w21", - "sub w20, w27, w20", - "uxtb w26, w20", - "cmp w26, w27", + "add w22, w20, w21", + "sub w20, w23, w22", + "uxtb w22, w20", + "cmp w22, w23", "cset x20, hi", - "cmp w26, w27", - "cset x22, hs", + "cmp w22, w23", + "cset x24, hs", "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv", - "bic w20, w27, w26", - "rmif x20, #7, #nzcV" + "csel x25, x24, x20, eq", + "cmn wzr, w22, lsl #24", + "rmif x25, #63, #nzCv", + "bic w20, w23, w22", + "rmif x20, #7, #nzcV", + "mov x26, x22" ] }, "lock sbb byte [rax], 0xFF": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 22, "Comment": "GROUP1 0x80 /3", "ExpectedArm64ASM": [ "mov w20, #0xff", - "adc w21, wzr, w20", - "neg w1, w21", - "ldaddalb w1, w21, [x4]", - "mvn w27, w21", - "cset w22, hs", - "add w20, w20, w22", - "sub w20, w21, w20", - "uxtb w26, w20", - "cmp w26, w21", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddalb w1, w23, [x21]", + "mvn w21, w23", + "mov x27, x21", + "cset w21, hs", + "add w22, w20, w21", + "sub w20, w23, w22", + "uxtb w22, w20", + "cmp w22, w23", "cset x20, hi", - "cmp w26, w21", - "cset x23, hs", - "cmp x22, #0x1 (1)", - "csel x20, x23, x20, eq", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv", - "bic w20, w26, w21", - "rmif x20, #7, #nzcV" + "cmp w22, w23", + "cset x24, hs", + "cmp x21, #0x1 (1)", + "csel x25, x24, x20, eq", + "cmn wzr, w22, lsl #24", + "rmif x25, #63, #nzCv", + "bic w20, w22, w23", + "rmif x20, #7, #nzcV", + "mov x26, x22" ] }, "lock sbb word [rax], 0x100": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 21, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov w20, #0x100", - "adc w21, wzr, w20", - "neg w1, w21", - "ldaddalh w1, w27, [x4]", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddalh w1, w23, [x21]", + "mov x27, x23", "cset w21, hs", - "add w20, w20, w21", - "sub w20, w27, w20", - "uxth w26, w20", - "cmp w26, w27", + "add w22, w20, w21", + "sub w20, w23, w22", + "uxth w22, w20", + "cmp w22, w23", "cset x20, hi", - "cmp w26, w27", - "cset x22, hs", + "cmp w22, w23", + "cset x24, hs", "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #16", - "rmif x20, #63, #nzCv", - "bic w20, w27, w26", - "rmif x20, #15, #nzcV" + "csel x25, x24, x20, eq", + "cmn wzr, w22, lsl #16", + "rmif x25, #63, #nzCv", + "bic w20, w23, w22", + "rmif x20, #15, #nzcV", + "mov x26, x22" ] }, "lock sbb word [rax], 0xFFFF": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 22, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "adc w21, wzr, w20", - "neg w1, w21", - "ldaddalh w1, w21, [x4]", - "mvn w27, w21", - "cset w22, hs", - "add w20, w20, w22", - "sub w20, w21, w20", - "uxth w26, w20", - "cmp w26, w21", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddalh w1, w23, [x21]", + "mvn w21, w23", + "mov x27, x21", + "cset w21, hs", + "add w22, w20, w21", + "sub w20, w23, w22", + "uxth w22, w20", + "cmp w22, w23", "cset x20, hi", - "cmp w26, w21", - "cset x23, hs", - "cmp x22, #0x1 (1)", - "csel x20, x23, x20, eq", - "cmn wzr, w26, lsl #16", - "rmif x20, #63, #nzCv", - "bic w20, w26, w21", - "rmif x20, #15, #nzcV" + "cmp w22, w23", + "cset x24, hs", + "cmp x21, #0x1 (1)", + "csel x25, x24, x20, eq", + "cmn wzr, w22, lsl #16", + "rmif x25, #63, #nzCv", + "bic w20, w22, w23", + "rmif x20, #15, #nzcV", + "mov x26, x22" ] }, "lock sbb dword [rax], 0x100": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov w20, #0x100", - "adc w21, wzr, w20", - "neg w1, w21", - "ldaddal w1, w27, [x4]", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddal w1, w23, [x21]", + "mov x27, x23", "cfinv", - "sbcs w26, w27, w20", - "cfinv" + "sbcs w21, w23, w20", + "cfinv", + "mov x26, x21" ] }, "lock sbb dword [rax], 0xFFFFFFFF": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "adc w21, wzr, w20", - "neg w1, w21", - "ldaddal w1, w21, [x4]", - "mvn w27, w21", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddal w1, w23, [x21]", + "mvn w21, w23", + "mov x27, x21", "cfinv", - "sbcs w26, w21, w20", - "cfinv" + "sbcs w21, w23, w20", + "cfinv", + "mov x26, x21" ] }, "lock sbb qword [rax], 0x100": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov w20, #0x100", - "adc x21, xzr, x20", - "neg x1, x21", - "ldaddal x1, x27, [x4]", + "mov x21, x4", + "adc x22, xzr, x20", + "neg x1, x22", + "ldaddal x1, x23, [x21]", + "mov x27, x23", "cfinv", - "sbcs x26, x27, x20", - "cfinv" + "sbcs x21, x23, x20", + "cfinv", + "mov x26, x21" ] }, "lock sbb qword [rax], -2147483647": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov x20, #0xffffffff80000001", - "adc x21, xzr, x20", - "neg x1, x21", - "ldaddal x1, x27, [x4]", + "mov x21, x4", + "adc x22, xzr, x20", + "neg x1, x22", + "ldaddal x1, x23, [x21]", + "mov x27, x23", "cfinv", - "sbcs x26, x27, x20", - "cfinv" + "sbcs x21, x23, x20", + "cfinv", + "mov x26, x21" ] }, "lock sbb word [rax], 1": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 21, "Comment": "GROUP1 0x83 /3", "ExpectedArm64ASM": [ "mov w20, #0x1", - "adc w21, wzr, w20", - "neg w1, w21", - "ldaddalh w1, w27, [x4]", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddalh w1, w23, [x21]", + "mov x27, x23", "cset w21, hs", - "add w20, w20, w21", - "sub w20, w27, w20", - "uxth w26, w20", - "cmp w26, w27", + "add w22, w20, w21", + "sub w20, w23, w22", + "uxth w22, w20", + "cmp w22, w23", "cset x20, hi", - "cmp w26, w27", - "cset x22, hs", + "cmp w22, w23", + "cset x24, hs", "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #16", - "rmif x20, #63, #nzCv", - "bic w20, w27, w26", - "rmif x20, #15, #nzcV" + "csel x25, x24, x20, eq", + "cmn wzr, w22, lsl #16", + "rmif x25, #63, #nzCv", + "bic w20, w23, w22", + "rmif x20, #15, #nzcV", + "mov x26, x22" ] }, "lock sbb dword [rax], 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x83 /3", "ExpectedArm64ASM": [ "mov w20, #0x1", - "adc w21, wzr, w20", - "neg w1, w21", - "ldaddal w1, w27, [x4]", + "mov x21, x4", + "adc w22, wzr, w20", + "neg w1, w22", + "ldaddal w1, w23, [x21]", + "mov x27, x23", "cfinv", - "sbcs w26, w27, w20", - "cfinv" + "sbcs w21, w23, w20", + "cfinv", + "mov x26, x21" ] }, "lock sbb qword [rax], 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x83 /3", "ExpectedArm64ASM": [ "mov w20, #0x1", - "adc x21, xzr, x20", - "neg x1, x21", - "ldaddal x1, x27, [x4]", + "mov x21, x4", + "adc x22, xzr, x20", + "neg x1, x22", + "ldaddal x1, x23, [x21]", + "mov x27, x23", "cfinv", - "sbcs x26, x27, x20", - "cfinv" + "sbcs x21, x23, x20", + "cfinv", + "mov x26, x21" ] }, "lock and byte [rax], 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x80 /4", "ExpectedArm64ASM": [ "mov w20, #0x1", + "mov x21, x4", "mvn w1, w20", - "ldclralb w1, w20, [x4]", - "and w26, w20, #0x1", - "cmn wzr, w26, lsl #24" + "ldclralb w1, w22, [x21]", + "and w20, w22, #0x1", + "cmn wzr, w20, lsl #24", + "mov x26, x20" ] }, "lock and byte [rax], 0xFF": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x80 /4", "ExpectedArm64ASM": [ "mov w20, #0xff", + "mov x21, x4", "mvn w1, w20", - "ldclralb w1, w20, [x4]", - "and w26, w20, #0xff", - "cmn wzr, w26, lsl #24" + "ldclralb w1, w22, [x21]", + "and w20, w22, #0xff", + "cmn wzr, w20, lsl #24", + "mov x26, x20" ] }, "lock and word [rax], 0x100": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ "mov w20, #0x100", + "mov x21, x4", "mvn w1, w20", - "ldclralh w1, w20, [x4]", - "and w26, w20, #0x100", - "cmn wzr, w26, lsl #16" + "ldclralh w1, w22, [x21]", + "and w20, w22, #0x100", + "cmn wzr, w20, lsl #16", + "mov x26, x20" ] }, "lock and word [rax], 0xFFFF": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ "mov w20, #0xffff", + "mov x21, x4", "mvn w1, w20", - "ldclralh w1, w20, [x4]", - "and w26, w20, #0xffff", - "cmn wzr, w26, lsl #16" + "ldclralh w1, w22, [x21]", + "and w20, w22, #0xffff", + "cmn wzr, w20, lsl #16", + "mov x26, x20" ] }, "lock and dword [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ "mov w20, #0x100", + "mov x21, x4", "mvn w1, w20", - "ldclral w1, w20, [x4]", - "ands w26, w20, #0x100" + "ldclral w1, w22, [x21]", + "ands w20, w22, #0x100", + "mov x26, x20" ] }, "lock and dword [rax], 0xFFFFFFFF": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", + "mov x21, x4", "mvn w1, w20", - "ldclral w1, w21, [x4]", - "ands w26, w21, w20" + "ldclral w1, w22, [x21]", + "ands w21, w22, w20", + "mov x26, x21" ] }, "lock and qword [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ "mov w20, #0x100", + "mov x21, x4", "mvn x1, x20", - "ldclral x1, x20, [x4]", - "ands x26, x20, #0x100" + "ldclral x1, x22, [x21]", + "ands x20, x22, #0x100", + "mov x26, x20" ] }, "lock and qword [rax], -2147483647": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ "mov x20, #0xffffffff80000001", + "mov x21, x4", "mvn x1, x20", - "ldclral x1, x20, [x4]", - "ands x26, x20, #0xffffffff80000001" + "ldclral x1, x22, [x21]", + "ands x20, x22, #0xffffffff80000001", + "mov x26, x20" ] }, "lock and word [rax], 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x83 /4", "ExpectedArm64ASM": [ "mov w20, #0x1", + "mov x21, x4", "mvn w1, w20", - "ldclralh w1, w20, [x4]", - "and w26, w20, #0x1", - "cmn wzr, w26, lsl #16" + "ldclralh w1, w22, [x21]", + "and w20, w22, #0x1", + "cmn wzr, w20, lsl #16", + "mov x26, x20" ] }, "lock and dword [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /4", "ExpectedArm64ASM": [ "mov w20, #0x1", + "mov x21, x4", "mvn w1, w20", - "ldclral w1, w20, [x4]", - "ands w26, w20, #0x1" + "ldclral w1, w22, [x21]", + "ands w20, w22, #0x1", + "mov x26, x20" ] }, "lock and qword [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /4", "ExpectedArm64ASM": [ "mov w20, #0x1", + "mov x21, x4", "mvn x1, x20", - "ldclral x1, x20, [x4]", - "ands x26, x20, #0x1" + "ldclral x1, x22, [x21]", + "ands x20, x22, #0x1", + "mov x26, x20" ] }, "lock sub byte [rax], 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x80 /5", "ExpectedArm64ASM": [ "mov w20, #0x1", + "mov x21, x4", "neg w1, w20", - "ldaddalb w1, w27, [x4]", - "lsl w0, w27, #24", + "ldaddalb w1, w22, [x21]", + "mov x27, x22", + "lsl w0, w22, #24", "cmp w0, w20, lsl #24", - "sub w26, w27, #0x1 (1)", + "sub w20, w22, #0x1 (1)", + "mov x26, x20", "cfinv" ] }, "lock sub byte [rax], 0xFF": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "GROUP1 0x80 /5", "ExpectedArm64ASM": [ "mov w20, #0xff", + "mov x21, x4", "neg w1, w20", - "ldaddalb w1, w21, [x4]", - "mvn w27, w21", - "lsl w0, w21, #24", + "ldaddalb w1, w22, [x21]", + "mvn w21, w22", + "mov x27, x21", + "lsl w0, w22, #24", "cmp w0, w20, lsl #24", - "sub w26, w21, #0xff (255)", + "sub w20, w22, #0xff (255)", + "mov x26, x20", "cfinv" ] }, "lock sub word [rax], 0x100": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ "mov w20, #0x100", + "mov x21, x4", "neg w1, w20", - "ldaddalh w1, w27, [x4]", - "lsl w0, w27, #16", + "ldaddalh w1, w22, [x21]", + "mov x27, x22", + "lsl w0, w22, #16", "cmp w0, w20, lsl #16", - "sub w26, w27, #0x100 (256)", + "sub w20, w22, #0x100 (256)", + "mov x26, x20", "cfinv" ] }, "lock sub word [rax], 0xFFFF": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ "mov w20, #0xffff", + "mov x21, x4", "neg w1, w20", - "ldaddalh w1, w21, [x4]", - "mvn w27, w21", - "lsl w0, w21, #16", + "ldaddalh w1, w22, [x21]", + "mvn w21, w22", + "mov x27, x21", + "lsl w0, w22, #16", "cmp w0, w20, lsl #16", - "sub w26, w21, w20", + "sub w21, w22, w20", + "mov x26, x21", "cfinv" ] }, "lock sub dword [rax], 0x100": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ "mov w20, #0x100", + "mov x21, x4", "neg w1, w20", - "ldaddal w1, w27, [x4]", - "subs w26, w27, #0x100 (256)", + "ldaddal w1, w22, [x21]", + "mov x27, x22", + "subs w20, w22, #0x100 (256)", + "mov x26, x20", "cfinv" ] }, "lock sub dword [rax], 0xFFFFFFFF": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", + "mov x21, x4", "neg w1, w20", - "ldaddal w1, w21, [x4]", - "mvn w27, w21", - "subs w26, w21, w20", + "ldaddal w1, w22, [x21]", + "mvn w21, w22", + "mov x27, x21", + "subs w21, w22, w20", + "mov x26, x21", "cfinv" ] }, "lock sub qword [rax], 0x100": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ "mov w20, #0x100", + "mov x21, x4", "neg x1, x20", - "ldaddal x1, x27, [x4]", - "subs x26, x27, #0x100 (256)", + "ldaddal x1, x22, [x21]", + "mov x27, x22", + "subs x20, x22, #0x100 (256)", + "mov x26, x20", "cfinv" ] }, "lock sub qword [rax], -2147483647": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ "mov x20, #0xffffffff80000001", + "mov x21, x4", "neg x1, x20", - "ldaddal x1, x27, [x4]", - "subs x26, x27, x20", + "ldaddal x1, x22, [x21]", + "mov x27, x22", + "subs x21, x22, x20", + "mov x26, x21", "cfinv" ] }, "lock sub word [rax], 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x83 /5", "ExpectedArm64ASM": [ "mov w20, #0x1", + "mov x21, x4", "neg w1, w20", - "ldaddalh w1, w27, [x4]", - "lsl w0, w27, #16", + "ldaddalh w1, w22, [x21]", + "mov x27, x22", + "lsl w0, w22, #16", "cmp w0, w20, lsl #16", - "sub w26, w27, #0x1 (1)", + "sub w20, w22, #0x1 (1)", + "mov x26, x20", "cfinv" ] }, "lock sub dword [rax], 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x83 /5", "ExpectedArm64ASM": [ "mov w20, #0x1", + "mov x21, x4", "neg w1, w20", - "ldaddal w1, w27, [x4]", - "subs w26, w27, #0x1 (1)", + "ldaddal w1, w22, [x21]", + "mov x27, x22", + "subs w20, w22, #0x1 (1)", + "mov x26, x20", "cfinv" ] }, "lock sub qword [rax], 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x83 /5", "ExpectedArm64ASM": [ "mov w20, #0x1", + "mov x21, x4", "neg x1, x20", - "ldaddal x1, x27, [x4]", - "subs x26, x27, #0x1 (1)", + "ldaddal x1, x22, [x21]", + "mov x27, x22", + "subs x20, x22, #0x1 (1)", + "mov x26, x20", "cfinv" ] }, "lock xor byte [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x80 /6", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldeoralb w20, w20, [x4]", - "eor w26, w20, #0x1", - "cmn wzr, w26, lsl #24" + "mov x21, x4", + "ldeoralb w20, w22, [x21]", + "eor w20, w22, #0x1", + "mov x26, x20", + "cmn wzr, w20, lsl #24" ] }, "lock xor byte [rax], 0xFF": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x80 /6", "ExpectedArm64ASM": [ "mov w20, #0xff", - "ldeoralb w20, w20, [x4]", - "eor w26, w20, #0xff", - "cmn wzr, w26, lsl #24" + "mov x21, x4", + "ldeoralb w20, w22, [x21]", + "eor w20, w22, #0xff", + "mov x26, x20", + "cmn wzr, w20, lsl #24" ] }, "lock xor word [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldeoralh w20, w20, [x4]", - "eor w26, w20, #0x100", - "cmn wzr, w26, lsl #16" + "mov x21, x4", + "ldeoralh w20, w22, [x21]", + "eor w20, w22, #0x100", + "mov x26, x20", + "cmn wzr, w20, lsl #16" ] }, "lock xor word [rax], 0xFFFF": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "ldeoralh w20, w20, [x4]", - "eor w26, w20, #0xffff", - "cmn wzr, w26, lsl #16" + "mov x21, x4", + "ldeoralh w20, w22, [x21]", + "eor w20, w22, #0xffff", + "mov x26, x20", + "cmn wzr, w20, lsl #16" ] }, "lock xor dword [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldeoral w20, w20, [x4]", - "eor w26, w20, #0x100", - "tst w26, w26" + "mov x21, x4", + "ldeoral w20, w22, [x21]", + "eor w20, w22, #0x100", + "mov x26, x20", + "tst w20, w20" ] }, "lock xor dword [rax], 0xFFFFFFFF": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "ldeoral w20, w21, [x4]", - "eor w26, w21, w20", - "tst w26, w26" + "mov x21, x4", + "ldeoral w20, w22, [x21]", + "eor w21, w22, w20", + "mov x26, x21", + "tst w21, w21" ] }, "lock xor qword [rax], 0x100": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ "mov w20, #0x100", - "ldeoral x20, x20, [x4]", - "eor x26, x20, #0x100", - "tst x26, x26" + "mov x21, x4", + "ldeoral x20, x22, [x21]", + "eor x20, x22, #0x100", + "mov x26, x20", + "tst x20, x20" ] }, "lock xor qword [rax], -2147483647": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ "mov x20, #0xffffffff80000001", - "ldeoral x20, x20, [x4]", - "eor x26, x20, #0xffffffff80000001", - "tst x26, x26" + "mov x21, x4", + "ldeoral x20, x22, [x21]", + "eor x20, x22, #0xffffffff80000001", + "mov x26, x20", + "tst x20, x20" ] }, "lock xor word [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /6", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldeoralh w20, w20, [x4]", - "eor w26, w20, #0x1", - "cmn wzr, w26, lsl #16" + "mov x21, x4", + "ldeoralh w20, w22, [x21]", + "eor w20, w22, #0x1", + "mov x26, x20", + "cmn wzr, w20, lsl #16" ] }, "lock xor dword [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /6", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldeoral w20, w20, [x4]", - "eor w26, w20, #0x1", - "tst w26, w26" + "mov x21, x4", + "ldeoral w20, w22, [x21]", + "eor w20, w22, #0x1", + "mov x26, x20", + "tst w20, w20" ] }, "lock xor qword [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /6", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldeoral x20, x20, [x4]", - "eor x26, x20, #0x1", - "tst x26, x26" + "mov x21, x4", + "ldeoral x20, x22, [x21]", + "eor x20, x22, #0x1", + "mov x26, x20", + "tst x20, x20" ] }, "lock dec byte [rax]": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "GROUP3 0xfe /1", "ExpectedArm64ASM": [ - "mov w20, #0xff", - "ldaddalb w20, w27, [x4]", - "sub w26, w27, #0x1 (1)", - "setf8 w26", - "bic w20, w27, w26", - "rmif x20, #7, #nzcV" + "mov x20, x4", + "mov w21, #0xff", + "ldaddalb w21, w22, [x20]", + "sub w20, w22, #0x1 (1)", + "mov x26, x20", + "mov x27, x22", + "setf8 w20", + "bic w21, w22, w20", + "rmif x21, #7, #nzcV" ] }, "lock not byte [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf6 /2", "ExpectedArm64ASM": [ "mov w20, #0xff", - "steorlb w20, [x4]" + "mov x21, x4", + "steorlb w20, [x21]" ] }, "lock not word [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /2", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "steorlh w20, [x4]" + "mov x21, x4", + "steorlh w20, [x21]" ] }, "lock not dword [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /2", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "steorl w20, [x4]" + "mov x21, x4", + "steorl w20, [x21]" ] }, "lock not qword [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /2", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "steorl x20, [x4]" + "mov x21, x4", + "steorl x20, [x21]" ] }, "lock neg byte [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xf6 /3", "ExpectedArm64ASM": [ - "ldaxrb w1, [x4]", + "mov x20, x4", + "ldaxrb w1, [x20]", "neg w2, w1", - "stlxrb w3, w2, [x4]", + "stlxrb w3, w2, [x20]", "cbnz w3, #-0xc", - "mov w27, w1", - "cmp wzr, w27, lsl #24", - "neg w26, w27", + "mov w21, w1", + "mov x27, x21", + "cmp wzr, w21, lsl #24", + "neg w20, w21", + "mov x26, x20", "cfinv" ] }, "lock neg word [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xf7 /3", "ExpectedArm64ASM": [ - "ldaxrh w1, [x4]", + "mov x20, x4", + "ldaxrh w1, [x20]", "neg w2, w1", - "stlxrh w3, w2, [x4]", + "stlxrh w3, w2, [x20]", "cbnz w3, #-0xc", - "mov w27, w1", - "cmp wzr, w27, lsl #16", - "neg w26, w27", + "mov w21, w1", + "mov x27, x21", + "cmp wzr, w21, lsl #16", + "neg w20, w21", + "mov x26, x20", "cfinv" ] }, "lock neg dword [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xf7 /3", "ExpectedArm64ASM": [ - "ldaxr w1, [x4]", + "mov x20, x4", + "ldaxr w1, [x20]", "neg w2, w1", - "stlxr w3, w2, [x4]", + "stlxr w3, w2, [x20]", "cbnz w3, #-0xc", - "mov w27, w1", - "negs w26, w27", + "mov w21, w1", + "mov x27, x21", + "negs w20, w21", + "mov x26, x20", "cfinv" ] }, "lock neg qword [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xf7 /3", "ExpectedArm64ASM": [ - "ldaxr x1, [x4]", + "mov x20, x4", + "ldaxr x1, [x20]", "neg x2, x1", - "stlxr w3, x2, [x4]", + "stlxr w3, x2, [x20]", "cbnz x3, #-0xc", - "mov x27, x1", - "negs x26, x27", + "mov x21, x1", + "mov x27, x21", + "negs x20, x21", + "mov x26, x20", "cfinv" ] }, "lock dec word [rax]": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "GROUP4 0xfe /1", "ExpectedArm64ASM": [ - "mov w20, #0xffff", - "ldaddalh w20, w27, [x4]", - "sub w26, w27, #0x1 (1)", - "setf16 w26", - "bic w20, w27, w26", - "rmif x20, #15, #nzcV" + "mov x20, x4", + "mov w21, #0xffff", + "ldaddalh w21, w22, [x20]", + "sub w20, w22, #0x1 (1)", + "mov x26, x20", + "mov x27, x22", + "setf16 w20", + "bic w21, w22, w20", + "rmif x21, #15, #nzcV" ] }, "lock dec dword [rax]": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP4 0xfe /1", "ExpectedArm64ASM": [ - "mov w20, #0xffffffff", - "ldaddal w20, w27, [x4]", + "mov x20, x4", + "mov w21, #0xffffffff", + "ldaddal w21, w22, [x20]", "cset w20, hs", - "subs w26, w27, #0x1 (1)", + "mov x27, x22", + "subs w21, w22, #0x1 (1)", + "mov x26, x21", "rmif x20, #63, #nzCv" ] }, "lock dec qword [rax]": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP4 0xfe /1", "ExpectedArm64ASM": [ - "mov x20, #0xffffffffffffffff", - "ldaddal x20, x27, [x4]", + "mov x20, x4", + "mov x21, #0xffffffffffffffff", + "ldaddal x21, x22, [x20]", "cset w20, hs", - "subs x26, x27, #0x1 (1)", + "mov x27, x22", + "subs x21, x22, #0x1 (1)", + "mov x26, x21", "rmif x20, #63, #nzCv" ] }, "lock inc byte [rax]": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "GROUP4 0xfe /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldaddalb w20, w27, [x4]", - "add w26, w27, #0x1 (1)", - "setf8 w26", - "bic w20, w26, w27", - "rmif x20, #7, #nzcV" + "mov x21, x4", + "ldaddalb w20, w22, [x21]", + "add w20, w22, #0x1 (1)", + "mov x26, x20", + "mov x27, x22", + "setf8 w20", + "bic w21, w20, w22", + "rmif x21, #7, #nzcV" ] }, "lock inc word [rax]": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "GROUP4 0xfe /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldaddalh w20, w27, [x4]", - "add w26, w27, #0x1 (1)", - "setf16 w26", - "bic w20, w26, w27", - "rmif x20, #15, #nzcV" + "mov x21, x4", + "ldaddalh w20, w22, [x21]", + "add w20, w22, #0x1 (1)", + "mov x26, x20", + "mov x27, x22", + "setf16 w20", + "bic w21, w20, w22", + "rmif x21, #15, #nzcV" ] }, "lock inc dword [rax]": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP4 0xfe /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldaddal w20, w27, [x4]", + "mov x21, x4", + "ldaddal w20, w22, [x21]", "cset w20, hs", - "adds w26, w27, #0x1 (1)", + "mov x27, x22", + "adds w21, w22, #0x1 (1)", + "mov x26, x21", "rmif x20, #63, #nzCv" ] }, "lock inc qword [rax]": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP4 0xfe /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "ldaddal x20, x27, [x4]", + "mov x21, x4", + "ldaddal x20, x22, [x21]", "cset w20, hs", - "adds x26, x27, #0x1 (1)", + "mov x27, x22", + "adds x21, x22, #0x1 (1)", + "mov x26, x21", "rmif x20, #63, #nzCv" ] } diff --git a/unittests/InstructionCountCI/FlagM/FlagOpts.json b/unittests/InstructionCountCI/FlagM/FlagOpts.json index c4c12e9adc..21dac1d70f 100644 --- a/unittests/InstructionCountCI/FlagM/FlagOpts.json +++ b/unittests/InstructionCountCI/FlagM/FlagOpts.json @@ -11,304 +11,424 @@ }, "Instructions": { "Chained add": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 10, "x86Insts": [ "add rax, rbx", "adc rcx, rcx" ], "ExpectedArm64ASM": [ - "adds x4, x4, x7", - "mov w27, #0x0", - "adcs x26, x5, x5", - "mov x5, x26" + "mov x20, x7", + "mov x21, x4", + "adds x22, x21, x20", + "mov x4, x22", + "mov x20, x5", + "mov w21, #0x0", + "mov x27, x21", + "adcs x21, x20, x20", + "mov x26, x21", + "mov x5, x21" ] }, "Chained sub": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 14, "x86Insts": [ "sub rax, rbx", "sbb rcx, rdx" ], "ExpectedArm64ASM": [ - "subs x4, x4, x7", + "mov x20, x7", + "mov x21, x4", + "subs x22, x21, x20", "cfinv", - "eor w27, w5, w6", + "mov x4, x22", + "mov x20, x6", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", "cfinv", - "sbcs x26, x5, x6", + "sbcs x22, x21, x20", "cfinv", - "mov x5, x26" + "mov x26, x22", + "mov x5, x22" ] }, "Inverted add": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 12, "x86Insts": [ "add rax, rbx", "adc rcx, rdx", "cmc" ], "ExpectedArm64ASM": [ - "adds x4, x4, x7", - "eor w27, w5, w6", - "adcs x26, x5, x6", - "mov x5, x26", + "mov x20, x7", + "mov x21, x4", + "adds x22, x21, x20", + "mov x4, x22", + "mov x20, x6", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "adcs x22, x21, x20", + "mov x26, x22", + "mov x5, x22", "cfinv" ] }, "Inverted sub": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 14, "x86Insts": [ "sub rax, rbx", "sbb rcx, rcx", "cmc" ], "ExpectedArm64ASM": [ - "subs x4, x4, x7", + "mov x20, x7", + "mov x21, x4", + "subs x22, x21, x20", "cfinv", - "mov w27, #0x0", + "mov x4, x22", + "mov x20, x5", + "mov w21, #0x0", + "mov x27, x21", "cfinv", - "sbcs x26, x5, x5", + "sbcs x21, x20, x20", "cfinv", - "mov x5, x26", + "mov x26, x21", + "mov x5, x21", "cfinv" ] }, "ADC dead": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 9, "x86Insts": [ "add rax, rbx", "adc rcx, rcx", "test rcx, rcx" ], "ExpectedArm64ASM": [ - "adds x4, x4, x7", - "adc x5, x5, x5", - "ands x26, x5, x5" + "mov x20, x7", + "mov x21, x4", + "adds x22, x21, x20", + "mov x4, x22", + "mov x20, x5", + "adc x21, x20, x20", + "mov x5, x21", + "ands x20, x21, x21", + "mov x26, x20" ] }, "INC consumed": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "x86Insts": [ "add rax, rbx", "inc rax" ], "ExpectedArm64ASM": [ - "adds x4, x4, x7", + "mov x20, x7", + "mov x21, x4", + "adds x22, x21, x20", + "mov x4, x22", "cset w20, hs", - "mov x27, x4", - "adds x26, x4, #0x1 (1)", + "mov x27, x22", + "adds x21, x22, #0x1 (1)", + "mov x26, x21", "rmif x20, #63, #nzCv", - "mov x4, x26" + "mov x4, x21" ] }, "INC dead": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 9, "x86Insts": [ "add rax, rbx", "inc rax", "test rax, rdx" ], "ExpectedArm64ASM": [ - "add x4, x4, x7", - "add x4, x4, #0x1 (1)", - "ands x26, x4, x6" + "mov x20, x7", + "mov x21, x4", + "add x22, x21, x20", + "mov x4, x22", + "add x20, x22, #0x1 (1)", + "mov x4, x20", + "mov x21, x6", + "ands x22, x20, x21", + "mov x26, x22" ] }, "DEC consumed": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "x86Insts": [ "sub rax, rbx", "dec rax" ], "ExpectedArm64ASM": [ - "subs x4, x4, x7", + "mov x20, x7", + "mov x21, x4", + "subs x22, x21, x20", "cfinv", + "mov x4, x22", "cset w20, hs", - "mov x27, x4", - "subs x26, x4, #0x1 (1)", + "mov x27, x22", + "subs x21, x22, #0x1 (1)", + "mov x26, x21", "rmif x20, #63, #nzCv", - "mov x4, x26" + "mov x4, x21" ] }, "DEC dead": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 9, "x86Insts": [ "sub rax, rbx", "dec rax", "test rax, rcx" ], "ExpectedArm64ASM": [ - "sub x4, x4, x7", - "sub x4, x4, #0x1 (1)", - "ands x26, x4, x5" + "mov x20, x7", + "mov x21, x4", + "sub x22, x21, x20", + "mov x4, x22", + "sub x20, x22, #0x1 (1)", + "mov x4, x20", + "mov x21, x5", + "ands x22, x20, x21", + "mov x26, x22" ] }, "8-bit DEC consumed": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 19, "x86Insts": [ "sub al, ah", "dec al" ], "ExpectedArm64ASM": [ - "lsr w20, w4, #8", - "lsl w0, w4, #24", - "cmp w0, w20, lsl #24", - "sub w20, w4, w20", + "mov x20, x4", + "lsr w21, w20, #8", + "lsl w0, w20, #24", + "cmp w0, w21, lsl #24", + "sub w22, w20, w21", "cfinv", - "bfxil x4, x20, #0, #8", - "uxtb w27, w4", - "sub w26, w27, #0x1 (1)", - "setf8 w26", - "bic w20, w27, w26", - "rmif x20, #7, #nzcV", - "bfxil x4, x26, #0, #8" + "mov x21, x20", + "bfxil x21, x22, #0, #8", + "mov x4, x21", + "uxtb w20, w21", + "sub w22, w20, #0x1 (1)", + "mov x26, x22", + "mov x27, x20", + "setf8 w22", + "bic w23, w20, w22", + "rmif x23, #7, #nzcV", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x4, x20" ] }, "8-bit DEC dead": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 14, "x86Insts": [ "sub al, ah", "dec al", "test al, al" ], "ExpectedArm64ASM": [ - "lsr w20, w4, #8", - "sub w20, w4, w20", - "bfxil x4, x20, #0, #8", - "uxtb w20, w4", - "sub w20, w20, #0x1 (1)", - "bfxil x4, x20, #0, #8", - "mov x26, x4", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "lsr w21, w20, #8", + "sub w22, w20, w21", + "mov x21, x20", + "bfxil x21, x22, #0, #8", + "mov x4, x21", + "uxtb w20, w21", + "sub w22, w20, #0x1 (1)", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x4, x20", + "mov x21, x20", + "cmn wzr, w21, lsl #24", + "mov x26, x21" ] }, "Variable shift dead": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 7, "x86Insts": [ "sar rax, cl", "test rax, rdx" ], "ExpectedArm64ASM": [ - "asr x4, x4, x5", - "ands x26, x4, x6" + "mov x20, x4", + "mov x21, x5", + "asr x22, x20, x21", + "mov x4, x22", + "mov x20, x6", + "ands x21, x22, x20", + "mov x26, x21" ] }, "Variable rotate-through-carry dead": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 22, "x86Insts": [ "rcr rax, cl", "test rax, rdx" ], "ExpectedArm64ASM": [ - "and x20, x5, #0x3f", - "cbz x20, #+0x34", - "lsr x20, x4, x5", - "cset w21, hs", - "neg x22, x5", - "lsl x23, x4, x22", - "orr x20, x20, x23, lsl #1", - "sub x23, x5, #0x1 (1)", - "lsr x23, x4, x23", - "rmif x23, #63, #nzCv", - "lsl x21, x21, x22", - "orr x4, x20, x21", - "eor x20, x4, x4, lsr #1", + "mov x20, x5", + "and x21, x20, #0x3f", + "cbz x21, #+0x40", + "mov x20, x5", + "mov x21, x4", + "lsr x22, x21, x20", + "cset w23, hs", + "neg x24, x20", + "lsl x25, x21, x24", + "orr x30, x22, x25, lsl #1", + "sub x22, x20, #0x1 (1)", + "lsr x20, x21, x22", + "rmif x20, #63, #nzCv", + "lsl x20, x23, x24", + "orr x21, x30, x20", + "eor x20, x21, x21, lsr #1", "rmif x20, #62, #nzcV", - "ands x26, x4, x6" + "mov x4, x21", + "mov x20, x6", + "mov x21, x4", + "ands x22, x21, x20", + "mov x26, x22" ] }, "Partial NZCV select (cmp)": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 11, "x86Insts": [ "cmp rax, rbx", "setz cl", "test cl, cl" ], "ExpectedArm64ASM": [ - "subs x20, x4, x7", + "mov x20, x7", + "mov x21, x4", + "subs x22, x21, x20", "cset x20, eq", - "bfxil x5, x20, #0, #8", - "mov x26, x5", - "cmn wzr, w26, lsl #24" + "mov x21, x5", + "mov x23, x21", + "bfxil x23, x20, #0, #8", + "mov x5, x23", + "mov x20, x23", + "cmn wzr, w20, lsl #24", + "mov x26, x20" ] }, "Partial NZCV select (add)": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 12, "x86Insts": [ "add rax, rbx", "setz cl", "test cl, cl" ], "ExpectedArm64ASM": [ - "adds x4, x4, x7", + "mov x20, x7", + "mov x21, x4", + "adds x22, x21, x20", + "mov x4, x22", "cset x20, eq", - "bfxil x5, x20, #0, #8", - "mov x26, x5", - "cmn wzr, w26, lsl #24" + "mov x21, x5", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x5, x22", + "mov x20, x22", + "cmn wzr, w20, lsl #24", + "mov x26, x20" ] }, "AND use only ZF": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 12, "x86Insts": [ "and eax, ebx", "setz cl", "test cl, cl" ], "ExpectedArm64ASM": [ - "ands w4, w4, w7", + "mov x20, x7", + "mov x21, x4", + "ands w22, w21, w20", + "mov x4, x22", "cset x20, eq", - "bfxil x5, x20, #0, #8", - "mov x26, x5", - "cmn wzr, w26, lsl #24" + "mov x21, x5", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x5, x22", + "mov x20, x22", + "cmn wzr, w20, lsl #24", + "mov x26, x20" ] }, "AND use only PF": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 15, "x86Insts": [ "and eax, ebx", "setp cl", "test cl, cl" ], "ExpectedArm64ASM": [ - "and w4, w4, w7", - "eor w20, w4, w4, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "and x20, x20, #0x1", - "bfxil x5, x20, #0, #8", - "mov x26, x5", - "cmn wzr, w26, lsl #24" + "mov x20, x7", + "mov x21, x4", + "and w22, w21, w20", + "mov x4, x22", + "eor w20, w22, w22, lsr #4", + "eor w21, w20, w20, lsr #2", + "eon w20, w21, w21, lsr #1", + "and x21, x20, #0x1", + "mov x20, x5", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x5, x22", + "mov x20, x22", + "cmn wzr, w20, lsl #24", + "mov x26, x20" ] }, "Dead cmpxchg flags": { - "ExpectedInstructionCount": 23, + "ExpectedInstructionCount": 39, "x86Insts": [ "cmpxchg8b [rbp]", "test rax, rax" ], "ExpectedArm64ASM": [ - "add x20, x9, #0x0 (0)", - "mov w21, w4", - "mov w22, w6", - "mov w23, w22", - "mov w22, w21", - "mov w21, w7", - "mov w24, w5", - "mov w25, w24", - "mov w24, w21", - "mov w2, w22", - "mov w3, w23", - "caspal w2, w3, w24, w25, [x20]", + "sub sp, sp, #0x40 (64)", + "mov x20, x9", + "add x21, x20, #0x0 (0)", + "mov x20, x4", + "mov w22, w20", + "mov x23, x6", + "mov w24, w23", + "mov x30, x24", + "mov w24, w22", + "mov w25, w30", + "mov x22, x7", + "mov w30, w22", + "mov x22, x5", + "mov w18, w22", + "str x23, [sp]", + "mov w22, w30", + "mov w23, w18", + "str x20, [sp, #32]", + "mov x30, x21", + "mov w2, w24", + "mov w3, w25", + "caspal w2, w3, w22, w23, [x30]", "mov w20, w2", "mov w21, w3", - "mov w24, w20", - "mov w25, w21", + "mov w22, w20", + "mov w23, w21", "mrs x0, nzcv", - "cmp w20, w22", - "ccmp w21, w23, #nzcv, eq", + "cmp w20, w24", + "ccmp w21, w25, #nzcv, eq", "rmif x0, #0, #NzCV", - "csel x4, x24, x4, ne", - "csel x6, x25, x6, ne", - "ands x26, x4, x4" + "ldr x20, [sp, #32]", + "csel x21, x22, x20, ne", + "mov x4, x21", + "ldr x20, [sp]", + "csel x22, x23, x20, ne", + "mov x6, x22", + "ands x20, x21, x21", + "mov x26, x20", + "add sp, sp, #0x40 (64)" ] } } diff --git a/unittests/InstructionCountCI/FlagM/H0F38.json b/unittests/InstructionCountCI/FlagM/H0F38.json index fe2d76bb42..984e61e98b 100644 --- a/unittests/InstructionCountCI/FlagM/H0F38.json +++ b/unittests/InstructionCountCI/FlagM/H0F38.json @@ -12,107 +12,123 @@ }, "Instructions": { "ptest xmm0, xmm1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 16, "Comment": [ "0x66 0x0f 0x38 0x17" ], "ExpectedArm64ASM": [ - "and v2.16b, v16.16b, v17.16b", - "bic v3.16b, v17.16b, v16.16b", - "umaxv h2, v2.8h", - "umaxv h3, v3.8h", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "and v4.16b, v2.16b, v3.16b", + "bic v5.16b, v3.16b, v2.16b", + "umaxv h2, v4.8h", + "umaxv h3, v5.8h", "umov w20, v2.h[0]", "umov w21, v3.h[0]", - "mov w27, #0x0", - "mov w26, #0x1", + "mov w22, #0x0", + "mov w23, #0x1", "cmp x21, #0x0 (0)", - "cset x21, eq", + "cset x24, eq", "tst w20, w20", - "rmif x21, #63, #nzCv" + "rmif x24, #63, #nzCv", + "mov x26, x23", + "mov x27, x22" ] }, "adcx eax, ebx": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "0x66 0x0f 0x38 0xf6" ], "ExpectedArm64ASM": [ "cset w20, hs", - "mov w21, w7", - "mov w22, w4", - "add w23, w21, w20", - "add w4, w22, w23", - "mrs x22, nzcv", - "cmp w4, w21", + "mov x21, x7", + "mov w22, w21", + "mov x21, x4", + "mov w23, w21", + "add w21, w22, w20", + "add w24, w23, w21", + "mov x4, x24", + "mrs x21, nzcv", + "cmp w24, w22", "cset x23, lo", - "cmp w4, w21", - "cset x21, ls", + "cmp w24, w22", + "cset x25, ls", "cmp x20, #0x1 (1)", - "csel x20, x21, x23, eq", - "msr nzcv, x22", - "rmif x20, #63, #nzCv" + "csel x22, x25, x23, eq", + "msr nzcv, x21", + "rmif x22, #63, #nzCv" ] }, "adcx rax, rbx": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 15, "Comment": [ "0x66 REX.W 0x0f 0x38 0xf6" ], "ExpectedArm64ASM": [ "cset w20, hs", - "add x21, x7, x20", - "add x4, x4, x21", - "mrs x21, nzcv", - "cmp x4, x7", - "cset x22, lo", - "cmp x4, x7", - "cset x23, ls", + "mov x21, x7", + "mov x22, x4", + "add x23, x21, x20", + "add x24, x22, x23", + "mov x4, x24", + "mrs x22, nzcv", + "cmp x24, x21", + "cset x23, lo", + "cmp x24, x21", + "cset x25, ls", "cmp x20, #0x1 (1)", - "csel x20, x23, x22, eq", - "msr nzcv, x21", - "rmif x20, #63, #nzCv" + "csel x21, x25, x23, eq", + "msr nzcv, x22", + "rmif x21, #63, #nzCv" ] }, "adox eax, ebx": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "0xf3 0x0f 0x38 0xf6" ], "ExpectedArm64ASM": [ "cset w20, vs", - "mov w21, w7", - "mov w22, w4", - "add w23, w21, w20", - "add w4, w22, w23", - "mrs x22, nzcv", - "cmp w4, w21", + "mov x21, x7", + "mov w22, w21", + "mov x21, x4", + "mov w23, w21", + "add w21, w22, w20", + "add w24, w23, w21", + "mov x4, x24", + "mrs x21, nzcv", + "cmp w24, w22", "cset x23, lo", - "cmp w4, w21", - "cset x21, ls", + "cmp w24, w22", + "cset x25, ls", "cmp x20, #0x1 (1)", - "csel x20, x21, x23, eq", - "msr nzcv, x22", - "rmif x20, #0, #nzcV" + "csel x22, x25, x23, eq", + "msr nzcv, x21", + "rmif x22, #0, #nzcV" ] }, "adox rax, rbx": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 15, "Comment": [ "0xf3 REX.W 0x0f 0x38 0xf6" ], "ExpectedArm64ASM": [ "cset w20, vs", - "add x21, x7, x20", - "add x4, x4, x21", - "mrs x21, nzcv", - "cmp x4, x7", - "cset x22, lo", - "cmp x4, x7", - "cset x23, ls", + "mov x21, x7", + "mov x22, x4", + "add x23, x21, x20", + "add x24, x22, x23", + "mov x4, x24", + "mrs x22, nzcv", + "cmp x24, x21", + "cset x23, lo", + "cmp x24, x21", + "cset x25, ls", "cmp x20, #0x1 (1)", - "csel x20, x23, x22, eq", - "msr nzcv, x21", - "rmif x20, #0, #nzcV" + "csel x21, x25, x23, eq", + "msr nzcv, x22", + "rmif x21, #0, #nzcV" ] } } diff --git a/unittests/InstructionCountCI/FlagM/HotBlocks.json b/unittests/InstructionCountCI/FlagM/HotBlocks.json index 9838b0a9d9..434ee259e2 100644 --- a/unittests/InstructionCountCI/FlagM/HotBlocks.json +++ b/unittests/InstructionCountCI/FlagM/HotBlocks.json @@ -13,7 +13,7 @@ }, "Instructions": { "The Witcher 3": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 17, "x86Insts": [ "mov eax, 0x1", "lock xadd qword [rcx], rax", @@ -24,19 +24,27 @@ "add rdx, rcx" ], "ExpectedArm64ASM": [ - "mov w4, #0x1", - "ldaddal x4, x4, [x5]", - "mov x6, x4", - "and w6, w4, #0x1f", - "add x6, x6, #0x1 (1)", - "lsl x6, x6, #6", - "eor w27, w6, w5", - "adds x26, x6, x5", - "mov x6, x26" + "mov w20, #0x1", + "mov x4, x20", + "mov x21, x5", + "ldaddal x20, x22, [x21]", + "mov x4, x22", + "mov x6, x22", + "and w20, w22, #0x1f", + "mov x6, x20", + "add x22, x20, #0x1 (1)", + "mov x6, x22", + "lsl x20, x22, #6", + "mov x6, x20", + "eor w22, w20, w21", + "mov x27, x22", + "adds x22, x20, x21", + "mov x26, x22", + "mov x6, x22" ] }, "FMOD scalar loop": { - "ExpectedInstructionCount": 88, + "ExpectedInstructionCount": 137, "x86Insts": [ "mov esi, ecx", "mov rdx, rbp", @@ -78,98 +86,147 @@ "sub esi, 0x1" ], "ExpectedArm64ASM": [ - "mov w10, w5", - "mov x6, x9", - "mov x4, x7", - "ldr s18, [x6]", - "add x4, x4, #0x20 (32)", - "fmul s0, s18, s16", - "mov v18.s[0], v0.s[0]", - "add x6, x6, #0x20 (32)", - "sub x20, x4, #0x20 (32)", - "ldr s2, [x20]", - "fadd s0, s18, s2", - "mov v18.s[0], v0.s[0]", - "sub x20, x4, #0x20 (32)", - "str s18, [x20]", - "sub x20, x6, #0x1c (28)", - "ldr s18, [x20]", - "fmul s0, s18, s17", - "mov v18.s[0], v0.s[0]", - "sub x20, x4, #0x1c (28)", - "ldr s2, [x20]", - "fadd s0, s18, s2", - "mov v18.s[0], v0.s[0]", - "sub x20, x4, #0x1c (28)", - "str s18, [x20]", - "sub x20, x6, #0x18 (24)", - "ldr s18, [x20]", - "fmul s0, s18, s16", - "mov v18.s[0], v0.s[0]", - "sub x20, x4, #0x18 (24)", - "ldr s2, [x20]", - "fadd s0, s18, s2", - "mov v18.s[0], v0.s[0]", - "sub x20, x4, #0x18 (24)", - "str s18, [x20]", - "sub x20, x6, #0x14 (20)", - "ldr s18, [x20]", - "fmul s0, s18, s17", - "mov v18.s[0], v0.s[0]", - "sub x20, x4, #0x14 (20)", - "ldr s2, [x20]", - "fadd s0, s18, s2", - "mov v18.s[0], v0.s[0]", - "sub x20, x4, #0x14 (20)", - "str s18, [x20]", - "sub x20, x6, #0x10 (16)", - "ldr s18, [x20]", - "fmul s0, s18, s16", - "mov v18.s[0], v0.s[0]", - "sub x20, x4, #0x10 (16)", - "ldr s2, [x20]", - "fadd s0, s18, s2", - "mov v18.s[0], v0.s[0]", - "sub x20, x4, #0x10 (16)", - "str s18, [x20]", - "sub x20, x6, #0xc (12)", - "ldr s18, [x20]", - "fmul s0, s18, s17", - "mov v18.s[0], v0.s[0]", - "sub x20, x4, #0xc (12)", - "ldr s2, [x20]", - "fadd s0, s18, s2", - "mov v18.s[0], v0.s[0]", - "sub x20, x4, #0xc (12)", - "str s18, [x20]", - "sub x20, x6, #0x8 (8)", - "ldr s18, [x20]", - "fmul s0, s18, s16", - "mov v18.s[0], v0.s[0]", - "sub x20, x4, #0x8 (8)", - "ldr s2, [x20]", - "fadd s0, s18, s2", - "mov v18.s[0], v0.s[0]", - "sub x20, x4, #0x8 (8)", - "str s18, [x20]", - "sub x20, x6, #0x4 (4)", - "ldr s18, [x20]", - "fmul s0, s18, s17", - "mov v18.s[0], v0.s[0]", - "sub x20, x4, #0x4 (4)", - "ldr s2, [x20]", - "fadd s0, s18, s2", - "mov v18.s[0], v0.s[0]", - "sub x20, x4, #0x4 (4)", - "str s18, [x20]", - "mov x27, x10", - "subs w26, w10, #0x1 (1)", + "mov x20, x5", + "mov w21, w20", + "mov x10, x21", + "mov x20, x9", + "mov x6, x20", + "mov x22, x7", + "mov x4, x22", + "ldr s2, [x20]", + "mov v18.16b, v2.16b", + "add x23, x22, #0x20 (32)", + "mov x4, x23", + "mov v3.16b, v16.16b", + "mov v4.16b, v2.16b", + "fmul s0, s2, s3", + "mov v4.s[0], v0.s[0]", + "mov v18.16b, v4.16b", + "add x22, x20, #0x20 (32)", + "mov x6, x22", + "sub x20, x23, #0x20 (32)", + "ldr s2, [x20]", + "mov v5.16b, v4.16b", + "fadd s0, s4, s2", + "mov v5.s[0], v0.s[0]", + "mov v18.16b, v5.16b", + "sub x20, x23, #0x20 (32)", + "str s5, [x20]", + "sub x20, x22, #0x1c (28)", + "ldr s2, [x20]", + "mov v18.16b, v2.16b", + "mov v4.16b, v17.16b", + "mov v5.16b, v2.16b", + "fmul s0, s2, s4", + "mov v5.s[0], v0.s[0]", + "mov v18.16b, v5.16b", + "sub x20, x23, #0x1c (28)", + "ldr s2, [x20]", + "mov v6.16b, v5.16b", + "fadd s0, s5, s2", + "mov v6.s[0], v0.s[0]", + "mov v18.16b, v6.16b", + "sub x20, x23, #0x1c (28)", + "str s6, [x20]", + "sub x20, x22, #0x18 (24)", + "ldr s2, [x20]", + "mov v18.16b, v2.16b", + "mov v5.16b, v2.16b", + "fmul s0, s2, s3", + "mov v5.s[0], v0.s[0]", + "mov v18.16b, v5.16b", + "sub x20, x23, #0x18 (24)", + "ldr s2, [x20]", + "mov v6.16b, v5.16b", + "fadd s0, s5, s2", + "mov v6.s[0], v0.s[0]", + "mov v18.16b, v6.16b", + "sub x20, x23, #0x18 (24)", + "str s6, [x20]", + "sub x20, x22, #0x14 (20)", + "ldr s2, [x20]", + "mov v18.16b, v2.16b", + "mov v5.16b, v2.16b", + "fmul s0, s2, s4", + "mov v5.s[0], v0.s[0]", + "mov v18.16b, v5.16b", + "sub x20, x23, #0x14 (20)", + "ldr s2, [x20]", + "mov v6.16b, v5.16b", + "fadd s0, s5, s2", + "mov v6.s[0], v0.s[0]", + "mov v18.16b, v6.16b", + "sub x20, x23, #0x14 (20)", + "str s6, [x20]", + "sub x20, x22, #0x10 (16)", + "ldr s2, [x20]", + "mov v18.16b, v2.16b", + "mov v5.16b, v2.16b", + "fmul s0, s2, s3", + "mov v5.s[0], v0.s[0]", + "mov v18.16b, v5.16b", + "sub x20, x23, #0x10 (16)", + "ldr s2, [x20]", + "mov v6.16b, v5.16b", + "fadd s0, s5, s2", + "mov v6.s[0], v0.s[0]", + "mov v18.16b, v6.16b", + "sub x20, x23, #0x10 (16)", + "str s6, [x20]", + "sub x20, x22, #0xc (12)", + "ldr s2, [x20]", + "mov v18.16b, v2.16b", + "mov v5.16b, v2.16b", + "fmul s0, s2, s4", + "mov v5.s[0], v0.s[0]", + "mov v18.16b, v5.16b", + "sub x20, x23, #0xc (12)", + "ldr s2, [x20]", + "mov v6.16b, v5.16b", + "fadd s0, s5, s2", + "mov v6.s[0], v0.s[0]", + "mov v18.16b, v6.16b", + "sub x20, x23, #0xc (12)", + "str s6, [x20]", + "sub x20, x22, #0x8 (8)", + "ldr s2, [x20]", + "mov v18.16b, v2.16b", + "mov v5.16b, v2.16b", + "fmul s0, s2, s3", + "mov v5.s[0], v0.s[0]", + "mov v18.16b, v5.16b", + "sub x20, x23, #0x8 (8)", + "ldr s2, [x20]", + "mov v3.16b, v5.16b", + "fadd s0, s5, s2", + "mov v3.s[0], v0.s[0]", + "mov v18.16b, v3.16b", + "sub x20, x23, #0x8 (8)", + "str s3, [x20]", + "sub x20, x22, #0x4 (4)", + "ldr s2, [x20]", + "mov v18.16b, v2.16b", + "mov v3.16b, v2.16b", + "fmul s0, s2, s4", + "mov v3.s[0], v0.s[0]", + "mov v18.16b, v3.16b", + "sub x20, x23, #0x4 (4)", + "ldr s2, [x20]", + "mov v4.16b, v3.16b", + "fadd s0, s3, s2", + "mov v4.s[0], v0.s[0]", + "mov v18.16b, v4.16b", + "sub x20, x23, #0x4 (4)", + "str s4, [x20]", + "mov x27, x21", + "subs w20, w21, #0x1 (1)", + "mov x26, x20", "cfinv", - "mov x10, x26" + "mov x10, x20" ] }, "Scalar vector add loop": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 16, "Comment": [ "Saw this in bytemark" ], @@ -181,17 +238,26 @@ "cmp rsi, rax" ], "ExpectedArm64ASM": [ - "ldr q16, [x16, x4, sxtx]", - "add v16.2d, v16.2d, v17.2d", - "str q16, [x16, x4, sxtx]", - "add x4, x4, #0x10 (16)", - "eor w27, w10, w4", - "subs x26, x10, x4", + "mov x20, x16", + "mov x21, x4", + "ldr q2, [x20, x21, sxtx]", + "mov v16.16b, v2.16b", + "mov v3.16b, v17.16b", + "add v4.2d, v2.2d, v3.2d", + "mov v16.16b, v4.16b", + "str q4, [x20, x21, sxtx]", + "add x20, x21, #0x10 (16)", + "mov x4, x20", + "mov x21, x10", + "eor w22, w21, w20", + "mov x27, x22", + "subs x22, x21, x20", + "mov x26, x22", "cfinv" ] }, "bytemark data xor loop": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 25, "Comment": [ "Saw this in bytemark" ], @@ -210,18 +276,27 @@ "mov x20, x4", "mov x6, x20", "mov x5, x20", - "mov x19, x10", - "add x4, x20, #0x1 (1)", - "lsr x6, x20, #6", - "and w5, w20, #0x3f", - "lsl x19, x19, x5", - "add x20, x7, x6, lsl #3", - "ldr x20, [x20]", - "eor x20, x20, x19", - "add x21, x7, x6, lsl #3", - "str x20, [x21]", - "eor w27, w11, w4", - "subs x26, x11, x4", + "mov x21, x10", + "mov x19, x21", + "add x22, x20, #0x1 (1)", + "mov x4, x22", + "lsr x23, x20, #6", + "mov x6, x23", + "and w24, w20, #0x3f", + "mov x5, x24", + "lsl x20, x21, x24", + "mov x19, x20", + "mov x21, x7", + "add x24, x21, x23, lsl #3", + "ldr x25, [x24]", + "eor x24, x25, x20", + "add x20, x21, x23, lsl #3", + "str x24, [x20]", + "mov x20, x11", + "eor w21, w20, w22", + "mov x27, x21", + "subs x21, x20, x22", + "mov x26, x21", "cfinv" ] } diff --git a/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json b/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json index 8d5d9fb854..beaac703fc 100644 --- a/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json +++ b/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json @@ -12,7 +12,7 @@ }, "Instructions": { "Sonic Mania movie player": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 32, "Comment": "Used to be hottest block in Sonic Mania", "x86Insts": [ "movzx edx, byte [esi+ecx]", @@ -27,26 +27,42 @@ "cmp esi, ebx" ], "ExpectedArm64ASM": [ - "add w20, w10, w5", - "ldrb w6, [x20]", - "add w20, w10, w11", - "ldrb w5, [x20]", - "orr w6, w6, #0xffff0000", - "lsl w6, w6, #8", - "add w10, w10, #0x1 (1)", - "orr w6, w6, w5", - "ldr w5, [x9, #12]", - "ldr w20, [x4]", - "orr w20, w20, w6", - "str w20, [x4]", - "add w4, w4, #0x4 (4)", - "eor w27, w10, w7", - "subs w26, w10, w7", + "mov w20, w10", + "mov w21, w5", + "add w22, w20, w21", + "ldrb w21, [x22]", + "mov w6, w21", + "mov w22, w11", + "add w23, w20, w22", + "ldrb w22, [x23]", + "mov w5, w22", + "orr w23, w21, #0xffff0000", + "mov w6, w23", + "lsl w21, w23, #8", + "mov w6, w21", + "add w23, w20, #0x1 (1)", + "mov w10, w23", + "orr w20, w21, w22", + "mov w6, w20", + "mov w21, w9", + "ldr w22, [x21, #12]", + "mov w5, w22", + "mov w21, w4", + "ldr w22, [x21]", + "orr w24, w22, w20", + "str w24, [x21]", + "add w20, w21, #0x4 (4)", + "mov w4, w20", + "mov w20, w7", + "eor w21, w23, w20", + "mov w27, w21", + "subs w21, w23, w20", + "mov w26, w21", "cfinv" ] }, "wine mscrt.dll memmove": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 25, "Comment": "Hot in Sonic Mania", "x86Insts": [ "movdqu xmm0, [esi]", @@ -63,24 +79,35 @@ "cmp ecx, 0x40" ], "ExpectedArm64ASM": [ - "ldr q16, [x10]", - "ldr q17, [x10, #16]", - "ldr q18, [x10, #32]", - "ldr q19, [x10, #48]", - "str q16, [x11]", - "str q17, [x11, #16]", - "str q18, [x11, #32]", - "str q19, [x11, #48]", - "add w10, w10, #0x40 (64)", - "add w11, w11, #0x40 (64)", - "sub w5, w5, #0x40 (64)", - "mov w27, w5", - "subs w26, w5, #0x40 (64)", + "mov w20, w10", + "ldr q2, [x20]", + "mov v16.16b, v2.16b", + "ldr q3, [x20, #16]", + "mov v17.16b, v3.16b", + "ldr q4, [x20, #32]", + "mov v18.16b, v4.16b", + "ldr q5, [x20, #48]", + "mov v19.16b, v5.16b", + "mov w21, w11", + "str q2, [x21]", + "str q3, [x21, #16]", + "str q4, [x21, #32]", + "str q5, [x21, #48]", + "add w22, w20, #0x40 (64)", + "mov w10, w22", + "add w20, w21, #0x40 (64)", + "mov w11, w20", + "mov w20, w5", + "sub w21, w20, #0x40 (64)", + "mov w5, w21", + "mov w27, w21", + "subs w20, w21, #0x40 (64)", + "mov w26, w20", "cfinv" ] }, "dxvk hotblock from MGRR": { - "ExpectedInstructionCount": 40, + "ExpectedInstructionCount": 58, "Comment": [ "Hottest block in Metal Gear Rising: Revengeance render thread" ], @@ -101,50 +128,68 @@ "lock cmpxchg8b qword [esi+0x8]" ], "ExpectedArm64ASM": [ - "ldr w6, [x4, #12]", - "ldr w4, [x4, #8]", - "mov w20, #0xffffffcc", - "str w10, [x9, w20, sxtw]", - "mov w5, w4", - "mov w7, w6", + "sub sp, sp, #0x20 (32)", + "mov w20, w4", + "ldr w21, [x20, #12]", + "mov w6, w21", + "ldr w22, [x20, #8]", + "mov w4, w22", + "mov w20, w10", + "mov w23, w9", + "mov w24, #0xffffffcc", + "str w20, [x23, w24, sxtw]", + "mov w5, w22", + "mov w7, w21", "mov w20, #0xffffffdc", - "ldr w10, [x9, w20, sxtw]", + "ldr w24, [x23, w20, sxtw]", + "mov w10, w24", "mov w20, #0xffffffff", - "adds w21, w4, w20", - "mov w5, w21", - "mvn w27, w6", - "adcs w26, w6, w20", - "mov w7, w26", + "adds w25, w22, w20", + "mov w5, w25", + "mvn w12, w21", + "mov w27, w12", + "adcs w12, w21, w20", + "mov w26, w12", + "mov w7, w12", "mov w20, #0xffffffd8", - "str w21, [x9, w20, sxtw]", + "str w25, [x23, w20, sxtw]", "mov w20, #0xffffffd4", - "str w26, [x9, w20, sxtw]", - "mov w7, w21", - "mov w22, #0xffffffd0", - "str w21, [x9, w22, sxtw]", - "ldr w5, [x9, w20, sxtw]", - "add w20, w10, #0x8 (8)", - "mov w22, w4", - "mov w23, w6", - "mov w24, w21", - "mov w25, w5", + "str w12, [x23, w20, sxtw]", + "mov w7, w25", + "mov w12, #0xffffffd0", + "str w25, [x23, w12, sxtw]", + "ldr w12, [x23, w20, sxtw]", + "mov w5, w12", + "add w20, w24, #0x8 (8)", + "mov x24, x22", + "mov w22, w24", + "mov w23, w21", + "str w21, [sp]", + "mov x13, x20", + "mov w20, w25", + "mov w21, w12", + "mov x12, x24", "mov w2, w22", "mov w3, w23", - "caspal w2, w3, w24, w25, [x20]", - "mov w20, w2", - "mov w21, w3", - "mov w24, w20", - "mov w25, w21", - "mrs x0, nzcv", - "cmp w20, w22", - "ccmp w21, w23, #nzcv, eq", + "caspal w2, w3, w20, w21, [x13]", + "mov w24, w2", + "mov w25, w3", + "mov w20, w24", + "mov w21, w25", + "mrs x0, nzcv", + "cmp w24, w22", + "ccmp w25, w23, #nzcv, eq", "rmif x0, #0, #NzCV", - "csel x4, x24, x4, ne", - "csel x6, x25, x6, ne" + "csel x22, x20, x12, ne", + "mov w4, w22", + "ldr w20, [sp]", + "csel x22, x21, x20, ne", + "mov w6, w22", + "add sp, sp, #0x20 (32)" ] }, "Psychonauts matrix swizzle": { - "ExpectedInstructionCount": 2426, + "ExpectedInstructionCount": 2513, "Comment": [ "Hottest block in Windows Psychonauts", "Doing a 4x4 32-bit float matrix swizzle", @@ -256,19 +301,24 @@ "pop ebp" ], "ExpectedArm64ASM": [ - "mov w20, w8", - "str w9, [x20, #-4]!", - "mov w8, w20", - "mov w9, w20", - "mov w27, w20", - "subs w26, w20, #0x44 (68)", + "mov w20, w9", + "mov w21, w8", + "mov w22, w21", + "str w20, [x22, #-4]!", + "mov w8, w22", + "mov w9, w22", + "mov w27, w22", + "subs w20, w22, #0x44 (68)", + "mov w26, w20", "cfinv", - "mov w8, w26", + "mov w8, w20", + "mov w20, w5", "mov w21, #0xffffffbc", - "str w5, [x20, w21, sxtw]", - "ldr w4, [x20, w21, sxtw]", - "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "str w20, [x22, w21, sxtw]", + "ldr w20, [x22, w21, sxtw]", + "mov w4, w20", + "ldrb w22, [x28, #747]", + "ldr s2, [x20]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -293,21 +343,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "mov w22, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "mov w20, #0x1", + "sub w23, w22, #0x1 (1)", + "and w22, w23, #0x7", "ldrb w23, [x28, #1026]", - "lsl w24, w22, w20", - "orr w23, w23, w24", - "strb w23, [x28, #1026]", - "strb w20, [x28, #747]", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "ldrb w20, [x28, #747]", - "add x0, x28, x20, lsl #4", + "lsl w24, w20, w22", + "orr w25, w23, w24", + "strb w25, [x28, #1026]", + "strb w22, [x28, #747]", + "add x0, x28, x22, lsl #4", + "str q3, [x0, #768]", + "ldrb w22, [x28, #747]", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -334,18 +384,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "mov w23, #0xffffffc0", - "str s2, [x9, w23, sxtw]", - "ldrb w23, [x28, #1026]", - "lsl w24, w22, w20", - "bic w23, w23, w24", - "strb w23, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "strb w20, [x28, #747]", - "ldr w5, [x9, w21, sxtw]", - "ldr s2, [x5, #16]", + "fmov s3, s0", + "mov w23, w9", + "mov w24, #0xffffffc0", + "str s3, [x23, w24, sxtw]", + "ldrb w24, [x28, #1026]", + "lsl w25, w20, w22", + "bic w12, w24, w25", + "strb w12, [x28, #1026]", + "add w24, w22, #0x1 (1)", + "and w22, w24, #0x7", + "strb w22, [x28, #747]", + "ldr w24, [x23, w21, sxtw]", + "mov w5, w24", + "ldr s2, [x24, #16]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -370,19 +422,19 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w24, w22, w20", - "orr w23, w23, w24", - "strb w23, [x28, #1026]", - "strb w20, [x28, #747]", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "ldrb w20, [x28, #747]", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "lsl w23, w20, w22", + "orr w24, w12, w23", + "strb w24, [x28, #1026]", + "strb w22, [x28, #747]", + "add x0, x28, x22, lsl #4", + "str q3, [x0, #768]", + "ldrb w22, [x28, #747]", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -409,18 +461,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "mov w23, #0xffffffc4", - "str s2, [x9, w23, sxtw]", - "ldrb w23, [x28, #1026]", - "lsl w24, w22, w20", - "bic w23, w23, w24", - "strb w23, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "strb w20, [x28, #747]", - "ldr w6, [x9, w21, sxtw]", - "ldr s2, [x6, #32]", + "fmov s3, s0", + "mov w23, w9", + "mov w24, #0xffffffc4", + "str s3, [x23, w24, sxtw]", + "ldrb w24, [x28, #1026]", + "lsl w25, w20, w22", + "bic w12, w24, w25", + "strb w12, [x28, #1026]", + "add w24, w22, #0x1 (1)", + "and w22, w24, #0x7", + "strb w22, [x28, #747]", + "ldr w24, [x23, w21, sxtw]", + "mov w6, w24", + "ldr s2, [x24, #32]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -445,19 +499,19 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w24, w22, w20", - "orr w23, w23, w24", - "strb w23, [x28, #1026]", - "strb w20, [x28, #747]", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "ldrb w20, [x28, #747]", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "lsl w23, w20, w22", + "orr w24, w12, w23", + "strb w24, [x28, #1026]", + "strb w22, [x28, #747]", + "add x0, x28, x22, lsl #4", + "str q3, [x0, #768]", + "ldrb w22, [x28, #747]", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -484,18 +538,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "mov w23, #0xffffffc8", - "str s2, [x9, w23, sxtw]", - "ldrb w23, [x28, #1026]", - "lsl w24, w22, w20", - "bic w23, w23, w24", - "strb w23, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "strb w20, [x28, #747]", - "ldr w4, [x9, w21, sxtw]", - "ldr s2, [x4, #48]", + "fmov s3, s0", + "mov w23, w9", + "mov w24, #0xffffffc8", + "str s3, [x23, w24, sxtw]", + "ldrb w24, [x28, #1026]", + "lsl w25, w20, w22", + "bic w12, w24, w25", + "strb w12, [x28, #1026]", + "add w24, w22, #0x1 (1)", + "and w22, w24, #0x7", + "strb w22, [x28, #747]", + "ldr w24, [x23, w21, sxtw]", + "mov w4, w24", + "ldr s2, [x24, #48]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -520,19 +576,19 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w24, w22, w20", - "orr w23, w23, w24", - "strb w23, [x28, #1026]", - "strb w20, [x28, #747]", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "ldrb w20, [x28, #747]", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "lsl w23, w20, w22", + "orr w24, w12, w23", + "strb w24, [x28, #1026]", + "strb w22, [x28, #747]", + "add x0, x28, x22, lsl #4", + "str q3, [x0, #768]", + "ldrb w22, [x28, #747]", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -559,18 +615,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "mov w23, #0xffffffcc", - "str s2, [x9, w23, sxtw]", - "ldrb w23, [x28, #1026]", - "lsl w24, w22, w20", - "bic w23, w23, w24", - "strb w23, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "strb w20, [x28, #747]", - "ldr w5, [x9, w21, sxtw]", - "ldr s2, [x5, #4]", + "fmov s3, s0", + "mov w23, w9", + "mov w24, #0xffffffcc", + "str s3, [x23, w24, sxtw]", + "ldrb w24, [x28, #1026]", + "lsl w25, w20, w22", + "bic w12, w24, w25", + "strb w12, [x28, #1026]", + "add w24, w22, #0x1 (1)", + "and w22, w24, #0x7", + "strb w22, [x28, #747]", + "ldr w24, [x23, w21, sxtw]", + "mov w5, w24", + "ldr s2, [x24, #4]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -595,19 +653,19 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w21, w22, w20", - "orr w21, w23, w21", - "strb w21, [x28, #1026]", - "strb w20, [x28, #747]", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "ldrb w20, [x28, #747]", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w21, w22, #0x1 (1)", + "and w22, w21, #0x7", + "lsl w21, w20, w22", + "orr w23, w12, w21", + "strb w23, [x28, #1026]", + "strb w22, [x28, #747]", + "add x0, x28, x22, lsl #4", + "str q3, [x0, #768]", + "ldrb w21, [x28, #747]", + "add x0, x28, x21, lsl #4", "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -634,19 +692,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "mov w21, #0xffffffd0", - "str s2, [x9, w21, sxtw]", - "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "strb w20, [x28, #747]", - "mov w22, #0xffffffbc", - "ldr w6, [x9, w22, sxtw]", - "ldr s2, [x6, #20]", + "fmov s3, s0", + "mov w22, w9", + "mov w23, #0xffffffd0", + "str s3, [x22, w23, sxtw]", + "ldrb w23, [x28, #1026]", + "lsl w24, w20, w21", + "bic w20, w23, w24", + "strb w20, [x28, #1026]", + "add w23, w21, #0x1 (1)", + "and w21, w23, #0x7", + "strb w21, [x28, #747]", + "mov w23, #0xffffffbc", + "ldr w24, [x22, w23, sxtw]", + "mov w6, w24", + "ldr s2, [x24, #20]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -671,18 +731,18 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "mov w23, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w24, w23, w20", - "orr w21, w21, w24", - "strb w21, [x28, #1026]", - "strb w20, [x28, #747]", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "mov w22, #0x1", + "sub w24, w21, #0x1 (1)", + "and w21, w24, #0x7", + "lsl w24, w22, w21", + "orr w25, w20, w24", + "strb w25, [x28, #1026]", + "strb w21, [x28, #747]", + "add x0, x28, x21, lsl #4", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -711,18 +771,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "mov w21, #0xffffffd4", - "str s2, [x9, w21, sxtw]", - "ldrb w21, [x28, #1026]", - "lsl w24, w23, w20", - "bic w21, w21, w24", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "fmov s3, s0", + "mov w21, w9", + "mov w24, #0xffffffd4", + "str s3, [x21, w24, sxtw]", + "ldrb w24, [x28, #1026]", + "lsl w25, w22, w20", + "bic w12, w24, w25", + "strb w12, [x28, #1026]", + "add w24, w20, #0x1 (1)", + "and w20, w24, #0x7", "strb w20, [x28, #747]", - "ldr w4, [x9, w22, sxtw]", - "ldr s2, [x4, #36]", + "ldr w24, [x21, w23, sxtw]", + "mov w4, w24", + "ldr s2, [x24, #36]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -747,17 +809,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w24, w23, w20", - "orr w21, w21, w24", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "lsl w21, w22, w20", + "orr w24, w12, w21", + "strb w24, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -786,18 +848,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "mov w21, #0xffffffd8", - "str s2, [x9, w21, sxtw]", - "ldrb w21, [x28, #1026]", - "lsl w24, w23, w20", - "bic w21, w21, w24", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "fmov s3, s0", + "mov w21, w9", + "mov w24, #0xffffffd8", + "str s3, [x21, w24, sxtw]", + "ldrb w24, [x28, #1026]", + "lsl w25, w22, w20", + "bic w12, w24, w25", + "strb w12, [x28, #1026]", + "add w24, w20, #0x1 (1)", + "and w20, w24, #0x7", "strb w20, [x28, #747]", - "ldr w5, [x9, w22, sxtw]", - "ldr s2, [x5, #52]", + "ldr w24, [x21, w23, sxtw]", + "mov w5, w24", + "ldr s2, [x24, #52]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -822,17 +886,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w24, w23, w20", - "orr w21, w21, w24", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "lsl w21, w22, w20", + "orr w24, w12, w21", + "strb w24, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -861,18 +925,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "mov w21, #0xffffffdc", - "str s2, [x9, w21, sxtw]", - "ldrb w21, [x28, #1026]", - "lsl w24, w23, w20", - "bic w21, w21, w24", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "fmov s3, s0", + "mov w21, w9", + "mov w24, #0xffffffdc", + "str s3, [x21, w24, sxtw]", + "ldrb w24, [x28, #1026]", + "lsl w25, w22, w20", + "bic w12, w24, w25", + "strb w12, [x28, #1026]", + "add w24, w20, #0x1 (1)", + "and w20, w24, #0x7", "strb w20, [x28, #747]", - "ldr w6, [x9, w22, sxtw]", - "ldr s2, [x6, #8]", + "ldr w24, [x21, w23, sxtw]", + "mov w6, w24", + "ldr s2, [x24, #8]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -897,17 +963,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w24, w23, w20", - "orr w21, w21, w24", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "lsl w21, w22, w20", + "orr w24, w12, w21", + "strb w24, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -936,18 +1002,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "mov w21, #0xffffffe0", - "str s2, [x9, w21, sxtw]", - "ldrb w21, [x28, #1026]", - "lsl w24, w23, w20", - "bic w21, w21, w24", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "fmov s3, s0", + "mov w21, w9", + "mov w24, #0xffffffe0", + "str s3, [x21, w24, sxtw]", + "ldrb w24, [x28, #1026]", + "lsl w25, w22, w20", + "bic w12, w24, w25", + "strb w12, [x28, #1026]", + "add w24, w20, #0x1 (1)", + "and w20, w24, #0x7", "strb w20, [x28, #747]", - "ldr w4, [x9, w22, sxtw]", - "ldr s2, [x4, #24]", + "ldr w24, [x21, w23, sxtw]", + "mov w4, w24", + "ldr s2, [x24, #24]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -972,17 +1040,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w22, w23, w20", - "orr w21, w21, w22", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "lsl w21, w22, w20", + "orr w23, w12, w21", + "strb w23, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -1011,19 +1079,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "mov w21, #0xffffffe4", - "str s2, [x9, w21, sxtw]", - "ldrb w21, [x28, #1026]", - "lsl w22, w23, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "fmov s3, s0", + "mov w21, w9", + "mov w23, #0xffffffe4", + "str s3, [x21, w23, sxtw]", + "ldrb w23, [x28, #1026]", + "lsl w24, w22, w20", + "bic w22, w23, w24", + "strb w22, [x28, #1026]", + "add w23, w20, #0x1 (1)", + "and w20, w23, #0x7", "strb w20, [x28, #747]", - "mov w22, #0xffffffbc", - "ldr w5, [x9, w22, sxtw]", - "ldr s2, [x5, #40]", + "mov w23, #0xffffffbc", + "ldr w24, [x21, w23, sxtw]", + "mov w5, w24", + "ldr s2, [x24, #40]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -1048,18 +1118,18 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "mov w23, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w24, w23, w20", - "orr w21, w21, w24", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "mov w21, #0x1", + "sub w24, w20, #0x1 (1)", + "and w20, w24, #0x7", + "lsl w24, w21, w20", + "orr w25, w22, w24", + "strb w25, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -1088,18 +1158,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "mov w21, #0xffffffe8", - "str s2, [x9, w21, sxtw]", - "ldrb w21, [x28, #1026]", - "lsl w24, w23, w20", - "bic w21, w21, w24", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "fmov s3, s0", + "mov w22, w9", + "mov w24, #0xffffffe8", + "str s3, [x22, w24, sxtw]", + "ldrb w24, [x28, #1026]", + "lsl w25, w21, w20", + "bic w12, w24, w25", + "strb w12, [x28, #1026]", + "add w24, w20, #0x1 (1)", + "and w20, w24, #0x7", "strb w20, [x28, #747]", - "ldr w6, [x9, w22, sxtw]", - "ldr s2, [x6, #56]", + "ldr w24, [x22, w23, sxtw]", + "mov w6, w24", + "ldr s2, [x24, #56]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -1124,17 +1196,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w24, w23, w20", - "orr w21, w21, w24", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "lsl w22, w21, w20", + "orr w24, w12, w22", + "strb w24, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -1163,18 +1235,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "mov w21, #0xffffffec", - "str s2, [x9, w21, sxtw]", - "ldrb w21, [x28, #1026]", - "lsl w24, w23, w20", - "bic w21, w21, w24", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "fmov s3, s0", + "mov w22, w9", + "mov w24, #0xffffffec", + "str s3, [x22, w24, sxtw]", + "ldrb w24, [x28, #1026]", + "lsl w25, w21, w20", + "bic w12, w24, w25", + "strb w12, [x28, #1026]", + "add w24, w20, #0x1 (1)", + "and w20, w24, #0x7", "strb w20, [x28, #747]", - "ldr w4, [x9, w22, sxtw]", - "ldr s2, [x4, #12]", + "ldr w24, [x22, w23, sxtw]", + "mov w4, w24", + "ldr s2, [x24, #12]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -1199,17 +1273,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w24, w23, w20", - "orr w21, w21, w24", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "lsl w22, w21, w20", + "orr w24, w12, w22", + "strb w24, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -1238,18 +1312,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "mov w21, #0xfffffff0", - "str s2, [x9, w21, sxtw]", - "ldrb w21, [x28, #1026]", - "lsl w24, w23, w20", - "bic w21, w21, w24", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "fmov s3, s0", + "mov w22, w9", + "mov w24, #0xfffffff0", + "str s3, [x22, w24, sxtw]", + "ldrb w24, [x28, #1026]", + "lsl w25, w21, w20", + "bic w12, w24, w25", + "strb w12, [x28, #1026]", + "add w24, w20, #0x1 (1)", + "and w20, w24, #0x7", "strb w20, [x28, #747]", - "ldr w5, [x9, w22, sxtw]", - "ldr s2, [x5, #28]", + "ldr w24, [x22, w23, sxtw]", + "mov w5, w24", + "ldr s2, [x24, #28]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -1274,17 +1350,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w24, w23, w20", - "orr w21, w21, w24", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "lsl w22, w21, w20", + "orr w24, w12, w22", + "strb w24, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -1313,18 +1389,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "mov w21, #0xfffffff4", - "str s2, [x9, w21, sxtw]", - "ldrb w21, [x28, #1026]", - "lsl w24, w23, w20", - "bic w21, w21, w24", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "fmov s3, s0", + "mov w22, w9", + "mov w24, #0xfffffff4", + "str s3, [x22, w24, sxtw]", + "ldrb w24, [x28, #1026]", + "lsl w25, w21, w20", + "bic w12, w24, w25", + "strb w12, [x28, #1026]", + "add w24, w20, #0x1 (1)", + "and w20, w24, #0x7", "strb w20, [x28, #747]", - "ldr w6, [x9, w22, sxtw]", - "ldr s2, [x6, #44]", + "ldr w24, [x22, w23, sxtw]", + "mov w6, w24", + "ldr s2, [x24, #44]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -1349,17 +1427,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w22, w23, w20", - "orr w21, w21, w22", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "lsl w22, w21, w20", + "orr w23, w12, w22", + "strb w23, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -1388,19 +1466,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "mov w21, #0xfffffff8", - "str s2, [x9, w21, sxtw]", - "ldrb w21, [x28, #1026]", - "lsl w22, w23, w20", - "bic w21, w21, w22", + "fmov s3, s0", + "mov w22, w9", + "mov w23, #0xfffffff8", + "str s3, [x22, w23, sxtw]", + "ldrb w23, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w23, w20, #0x1 (1)", + "and w20, w23, #0x7", "strb w20, [x28, #747]", - "mov w22, #0xffffffbc", - "ldr w4, [x9, w22, sxtw]", - "ldr s2, [x4, #60]", + "mov w23, #0xffffffbc", + "ldr w24, [x22, w23, sxtw]", + "mov w4, w24", + "ldr s2, [x24, #60]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -1425,18 +1505,18 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "mov w22, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w23, w20, #0x1 (1)", + "and w20, w23, #0x7", "lsl w23, w22, w20", - "orr w21, w21, w23", - "strb w21, [x28, #1026]", + "orr w24, w21, w23", + "strb w24, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -1465,19 +1545,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "mov w21, #0xfffffffc", - "str s2, [x9, w21, sxtw]", - "ldrb w21, [x28, #1026]", - "lsl w23, w22, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "fmov s3, s0", + "mov w21, w9", + "mov w23, #0xfffffffc", + "str s3, [x21, w23, sxtw]", + "ldrb w23, [x28, #1026]", + "lsl w24, w22, w20", + "bic w25, w23, w24", + "strb w25, [x28, #1026]", + "add w23, w20, #0x1 (1)", + "and w20, w23, #0x7", "strb w20, [x28, #747]", - "ldr w5, [x9, #8]", + "ldr w23, [x21, #8]", + "mov w5, w23", "mov w23, #0xffffffc0", - "ldr s2, [x9, w23, sxtw]", + "ldr s2, [x21, w23, sxtw]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -1502,17 +1584,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w23, w22, w20", - "orr w21, w21, w23", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "lsl w21, w22, w20", + "orr w23, w25, w21", + "strb w23, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -1541,18 +1623,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "str s2, [x5]", + "fmov s3, s0", + "mov w21, w5", + "str s3, [x21]", "ldrb w21, [x28, #1026]", "lsl w23, w22, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "strb w20, [x28, #747]", - "ldr w6, [x9, #8]", + "bic w24, w21, w23", + "strb w24, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "strb w20, [x28, #747]", + "mov w21, w9", + "ldr w23, [x21, #8]", + "mov w6, w23", "mov w23, #0xffffffc4", - "ldr s2, [x9, w23, sxtw]", + "ldr s2, [x21, w23, sxtw]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -1577,17 +1662,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w23, w22, w20", - "orr w21, w21, w23", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "lsl w21, w22, w20", + "orr w23, w24, w21", + "strb w23, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -1616,18 +1701,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "str s2, [x6, #4]", + "fmov s3, s0", + "mov w21, w6", + "str s3, [x21, #4]", "ldrb w21, [x28, #1026]", "lsl w23, w22, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "strb w20, [x28, #747]", - "ldr w4, [x9, #8]", + "bic w24, w21, w23", + "strb w24, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "strb w20, [x28, #747]", + "mov w21, w9", + "ldr w23, [x21, #8]", + "mov w4, w23", "mov w23, #0xffffffc8", - "ldr s2, [x9, w23, sxtw]", + "ldr s2, [x21, w23, sxtw]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -1652,17 +1740,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w23, w22, w20", - "orr w21, w21, w23", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "lsl w21, w22, w20", + "orr w23, w24, w21", + "strb w23, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -1691,18 +1779,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "str s2, [x4, #8]", + "fmov s3, s0", + "mov w21, w4", + "str s3, [x21, #8]", "ldrb w21, [x28, #1026]", "lsl w23, w22, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "strb w20, [x28, #747]", - "ldr w5, [x9, #8]", + "bic w24, w21, w23", + "strb w24, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "strb w20, [x28, #747]", + "mov w21, w9", + "ldr w23, [x21, #8]", + "mov w5, w23", "mov w23, #0xffffffcc", - "ldr s2, [x9, w23, sxtw]", + "ldr s2, [x21, w23, sxtw]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -1727,17 +1818,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w23, w22, w20", - "orr w21, w21, w23", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "lsl w21, w22, w20", + "orr w23, w24, w21", + "strb w23, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -1766,18 +1857,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "str s2, [x5, #12]", + "fmov s3, s0", + "mov w21, w5", + "str s3, [x21, #12]", "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "ldr w6, [x9, #8]", - "mov w22, #0xffffffd0", - "ldr s2, [x9, w22, sxtw]", + "mov w21, w9", + "ldr w23, [x21, #8]", + "mov w6, w23", + "mov w23, #0xffffffd0", + "ldr s2, [x21, w23, sxtw]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -1802,18 +1896,18 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "mov w22, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w23, w22, w20", - "orr w21, w21, w23", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "mov w21, #0x1", + "sub w23, w20, #0x1 (1)", + "and w20, w23, #0x7", + "lsl w23, w21, w20", + "orr w24, w22, w23", + "strb w24, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -1842,18 +1936,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "str s2, [x6, #16]", - "ldrb w21, [x28, #1026]", - "lsl w23, w22, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "fmov s3, s0", + "mov w22, w6", + "str s3, [x22, #16]", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w24, w22, w23", + "strb w24, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "ldr w4, [x9, #8]", + "mov w22, w9", + "ldr w23, [x22, #8]", + "mov w4, w23", "mov w23, #0xffffffd4", - "ldr s2, [x9, w23, sxtw]", + "ldr s2, [x22, w23, sxtw]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -1878,17 +1975,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w23, w22, w20", - "orr w21, w21, w23", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "lsl w22, w21, w20", + "orr w23, w24, w22", + "strb w23, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -1917,18 +2014,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "str s2, [x4, #20]", - "ldrb w21, [x28, #1026]", - "lsl w23, w22, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "strb w20, [x28, #747]", - "ldr w5, [x9, #8]", + "fmov s3, s0", + "mov w22, w4", + "str s3, [x22, #20]", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w24, w22, w23", + "strb w24, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "strb w20, [x28, #747]", + "mov w22, w9", + "ldr w23, [x22, #8]", + "mov w5, w23", "mov w23, #0xffffffd8", - "ldr s2, [x9, w23, sxtw]", + "ldr s2, [x22, w23, sxtw]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -1953,17 +2053,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w23, w22, w20", - "orr w21, w21, w23", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "lsl w22, w21, w20", + "orr w23, w24, w22", + "strb w23, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -1992,18 +2092,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "str s2, [x5, #24]", - "ldrb w21, [x28, #1026]", - "lsl w23, w22, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "fmov s3, s0", + "mov w22, w5", + "str s3, [x22, #24]", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w24, w22, w23", + "strb w24, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "ldr w6, [x9, #8]", + "mov w22, w9", + "ldr w23, [x22, #8]", + "mov w6, w23", "mov w23, #0xffffffdc", - "ldr s2, [x9, w23, sxtw]", + "ldr s2, [x22, w23, sxtw]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -2028,17 +2131,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w23, w22, w20", - "orr w21, w21, w23", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "lsl w22, w21, w20", + "orr w23, w24, w22", + "strb w23, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -2067,18 +2170,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "str s2, [x6, #28]", - "ldrb w21, [x28, #1026]", - "lsl w23, w22, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "fmov s3, s0", + "mov w22, w6", + "str s3, [x22, #28]", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w24, w22, w23", + "strb w24, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "ldr w4, [x9, #8]", + "mov w22, w9", + "ldr w23, [x22, #8]", + "mov w4, w23", "mov w23, #0xffffffe0", - "ldr s2, [x9, w23, sxtw]", + "ldr s2, [x22, w23, sxtw]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -2103,17 +2209,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w23, w22, w20", - "orr w21, w21, w23", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "lsl w22, w21, w20", + "orr w23, w24, w22", + "strb w23, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -2142,18 +2248,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "str s2, [x4, #32]", - "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", + "fmov s3, s0", + "mov w22, w4", + "str s3, [x22, #32]", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "ldr w5, [x9, #8]", - "mov w22, #0xffffffe4", - "ldr s2, [x9, w22, sxtw]", + "mov w22, w9", + "ldr w23, [x22, #8]", + "mov w5, w23", + "mov w23, #0xffffffe4", + "ldr s2, [x22, w23, sxtw]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -2178,18 +2287,18 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "mov w22, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w23, w20, #0x1 (1)", + "and w20, w23, #0x7", "lsl w23, w22, w20", - "orr w21, w21, w23", - "strb w21, [x28, #1026]", + "orr w24, w21, w23", + "strb w24, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -2218,18 +2327,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "str s2, [x5, #36]", + "fmov s3, s0", + "mov w21, w5", + "str s3, [x21, #36]", "ldrb w21, [x28, #1026]", "lsl w23, w22, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "strb w20, [x28, #747]", - "ldr w6, [x9, #8]", + "bic w24, w21, w23", + "strb w24, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "strb w20, [x28, #747]", + "mov w21, w9", + "ldr w23, [x21, #8]", + "mov w6, w23", "mov w23, #0xffffffe8", - "ldr s2, [x9, w23, sxtw]", + "ldr s2, [x21, w23, sxtw]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -2254,17 +2366,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w23, w22, w20", - "orr w21, w21, w23", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "lsl w21, w22, w20", + "orr w23, w24, w21", + "strb w23, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -2293,18 +2405,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "str s2, [x6, #40]", + "fmov s3, s0", + "mov w21, w6", + "str s3, [x21, #40]", "ldrb w21, [x28, #1026]", "lsl w23, w22, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "strb w20, [x28, #747]", - "ldr w4, [x9, #8]", + "bic w24, w21, w23", + "strb w24, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "strb w20, [x28, #747]", + "mov w21, w9", + "ldr w23, [x21, #8]", + "mov w4, w23", "mov w23, #0xffffffec", - "ldr s2, [x9, w23, sxtw]", + "ldr s2, [x21, w23, sxtw]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -2329,17 +2444,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w23, w22, w20", - "orr w21, w21, w23", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "lsl w21, w22, w20", + "orr w23, w24, w21", + "strb w23, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -2368,18 +2483,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "str s2, [x4, #44]", + "fmov s3, s0", + "mov w21, w4", + "str s3, [x21, #44]", "ldrb w21, [x28, #1026]", "lsl w23, w22, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "strb w20, [x28, #747]", - "ldr w5, [x9, #8]", + "bic w24, w21, w23", + "strb w24, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "strb w20, [x28, #747]", + "mov w21, w9", + "ldr w23, [x21, #8]", + "mov w5, w23", "mov w23, #0xfffffff0", - "ldr s2, [x9, w23, sxtw]", + "ldr s2, [x21, w23, sxtw]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -2404,17 +2522,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w23, w22, w20", - "orr w21, w21, w23", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "lsl w21, w22, w20", + "orr w23, w24, w21", + "strb w23, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -2443,18 +2561,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "str s2, [x5, #48]", + "fmov s3, s0", + "mov w21, w5", + "str s3, [x21, #48]", "ldrb w21, [x28, #1026]", "lsl w23, w22, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "strb w20, [x28, #747]", - "ldr w6, [x9, #8]", + "bic w24, w21, w23", + "strb w24, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "strb w20, [x28, #747]", + "mov w21, w9", + "ldr w23, [x21, #8]", + "mov w6, w23", "mov w23, #0xfffffff4", - "ldr s2, [x9, w23, sxtw]", + "ldr s2, [x21, w23, sxtw]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -2479,17 +2600,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w23, w22, w20", - "orr w21, w21, w23", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "lsl w21, w22, w20", + "orr w23, w24, w21", + "strb w23, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -2518,18 +2639,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "str s2, [x6, #52]", + "fmov s3, s0", + "mov w21, w6", + "str s3, [x21, #52]", "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "ldr w4, [x9, #8]", - "mov w22, #0xfffffff8", - "ldr s2, [x9, w22, sxtw]", + "mov w21, w9", + "ldr w23, [x21, #8]", + "mov w4, w23", + "mov w23, #0xfffffff8", + "ldr s2, [x21, w23, sxtw]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -2554,18 +2678,18 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "mov w22, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w23, w22, w20", - "orr w21, w21, w23", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "mov w21, #0x1", + "sub w23, w20, #0x1 (1)", + "and w20, w23, #0x7", + "lsl w23, w21, w20", + "orr w24, w22, w23", + "strb w24, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -2594,18 +2718,21 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "str s2, [x4, #56]", - "ldrb w21, [x28, #1026]", - "lsl w23, w22, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "strb w20, [x28, #747]", - "ldr w5, [x9, #8]", + "fmov s3, s0", + "mov w22, w4", + "str s3, [x22, #56]", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w24, w22, w23", + "strb w24, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "strb w20, [x28, #747]", + "mov w22, w9", + "ldr w23, [x22, #8]", + "mov w5, w23", "mov w23, #0xfffffffc", - "ldr s2, [x9, w23, sxtw]", + "ldr s2, [x22, w23, sxtw]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -2630,17 +2757,17 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w23, w22, w20", - "orr w21, w21, w23", - "strb w21, [x28, #1026]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "lsl w22, w21, w20", + "orr w23, w24, w22", + "strb w23, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", + "str q3, [x0, #768]", "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", @@ -2669,19 +2796,24 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x16, [sp], #16", "ldp x17, x30, [sp], #16", - "fmov s2, s0", - "str s2, [x5, #60]", - "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", + "fmov s3, s0", + "mov w22, w5", + "str s3, [x22, #60]", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "mov w8, w9", - "ldr w4, [x8, #8]", - "ldr w9, [x8]", - "add x8, x8, #0x4 (4)" + "mov w20, w9", + "ldr w21, [x20, #8]", + "mov w4, w21", + "mov w8, w20", + "ldr w21, [x20]", + "add x22, x20, #0x4 (4)", + "mov w8, w22", + "mov w9, w21" ] } } diff --git a/unittests/InstructionCountCI/FlagM/HotBlocks_AFP.json b/unittests/InstructionCountCI/FlagM/HotBlocks_AFP.json index f1871c91a3..3a18990c2e 100644 --- a/unittests/InstructionCountCI/FlagM/HotBlocks_AFP.json +++ b/unittests/InstructionCountCI/FlagM/HotBlocks_AFP.json @@ -13,7 +13,7 @@ }, "Instructions": { "FMOD scalar loop": { - "ExpectedInstructionCount": 72, + "ExpectedInstructionCount": 121, "x86Insts": [ "mov esi, ecx", "mov rdx, rbp", @@ -55,78 +55,127 @@ "sub esi, 0x1" ], "ExpectedArm64ASM": [ - "mov w10, w5", - "mov x6, x9", - "mov x4, x7", - "ldr s18, [x6]", - "add x4, x4, #0x20 (32)", - "fmul s18, s18, s16", - "add x6, x6, #0x20 (32)", - "sub x20, x4, #0x20 (32)", - "ldr s2, [x20]", - "fadd s18, s18, s2", - "sub x20, x4, #0x20 (32)", - "str s18, [x20]", - "sub x20, x6, #0x1c (28)", - "ldr s18, [x20]", - "fmul s18, s18, s17", - "sub x20, x4, #0x1c (28)", - "ldr s2, [x20]", - "fadd s18, s18, s2", - "sub x20, x4, #0x1c (28)", - "str s18, [x20]", - "sub x20, x6, #0x18 (24)", - "ldr s18, [x20]", - "fmul s18, s18, s16", - "sub x20, x4, #0x18 (24)", - "ldr s2, [x20]", - "fadd s18, s18, s2", - "sub x20, x4, #0x18 (24)", - "str s18, [x20]", - "sub x20, x6, #0x14 (20)", - "ldr s18, [x20]", - "fmul s18, s18, s17", - "sub x20, x4, #0x14 (20)", - "ldr s2, [x20]", - "fadd s18, s18, s2", - "sub x20, x4, #0x14 (20)", - "str s18, [x20]", - "sub x20, x6, #0x10 (16)", - "ldr s18, [x20]", - "fmul s18, s18, s16", - "sub x20, x4, #0x10 (16)", - "ldr s2, [x20]", - "fadd s18, s18, s2", - "sub x20, x4, #0x10 (16)", - "str s18, [x20]", - "sub x20, x6, #0xc (12)", - "ldr s18, [x20]", - "fmul s18, s18, s17", - "sub x20, x4, #0xc (12)", - "ldr s2, [x20]", - "fadd s18, s18, s2", - "sub x20, x4, #0xc (12)", - "str s18, [x20]", - "sub x20, x6, #0x8 (8)", - "ldr s18, [x20]", - "fmul s18, s18, s16", - "sub x20, x4, #0x8 (8)", - "ldr s2, [x20]", - "fadd s18, s18, s2", - "sub x20, x4, #0x8 (8)", - "str s18, [x20]", - "sub x20, x6, #0x4 (4)", - "ldr s18, [x20]", - "fmul s18, s18, s17", - "sub x20, x4, #0x4 (4)", - "ldr s2, [x20]", - "fadd s18, s18, s2", - "sub x20, x4, #0x4 (4)", - "str s18, [x20]", - "mov x27, x10", - "subs w26, w10, #0x1 (1)", + "mov x20, x5", + "mov w21, w20", + "mov x10, x21", + "mov x20, x9", + "mov x6, x20", + "mov x22, x7", + "mov x4, x22", + "ldr s2, [x20]", + "mov v18.16b, v2.16b", + "add x23, x22, #0x20 (32)", + "mov x4, x23", + "mov v3.16b, v16.16b", + "mov v4.16b, v2.16b", + "fmul s4, s2, s3", + "mov v18.16b, v4.16b", + "add x22, x20, #0x20 (32)", + "mov x6, x22", + "sub x20, x23, #0x20 (32)", + "ldr s2, [x20]", + "mov v5.16b, v4.16b", + "fadd s5, s4, s2", + "mov v18.16b, v5.16b", + "sub x20, x23, #0x20 (32)", + "str s5, [x20]", + "sub x20, x22, #0x1c (28)", + "ldr s2, [x20]", + "mov v18.16b, v2.16b", + "mov v4.16b, v17.16b", + "mov v5.16b, v2.16b", + "fmul s5, s2, s4", + "mov v18.16b, v5.16b", + "sub x20, x23, #0x1c (28)", + "ldr s2, [x20]", + "mov v6.16b, v5.16b", + "fadd s6, s5, s2", + "mov v18.16b, v6.16b", + "sub x20, x23, #0x1c (28)", + "str s6, [x20]", + "sub x20, x22, #0x18 (24)", + "ldr s2, [x20]", + "mov v18.16b, v2.16b", + "mov v5.16b, v2.16b", + "fmul s5, s2, s3", + "mov v18.16b, v5.16b", + "sub x20, x23, #0x18 (24)", + "ldr s2, [x20]", + "mov v6.16b, v5.16b", + "fadd s6, s5, s2", + "mov v18.16b, v6.16b", + "sub x20, x23, #0x18 (24)", + "str s6, [x20]", + "sub x20, x22, #0x14 (20)", + "ldr s2, [x20]", + "mov v18.16b, v2.16b", + "mov v5.16b, v2.16b", + "fmul s5, s2, s4", + "mov v18.16b, v5.16b", + "sub x20, x23, #0x14 (20)", + "ldr s2, [x20]", + "mov v6.16b, v5.16b", + "fadd s6, s5, s2", + "mov v18.16b, v6.16b", + "sub x20, x23, #0x14 (20)", + "str s6, [x20]", + "sub x20, x22, #0x10 (16)", + "ldr s2, [x20]", + "mov v18.16b, v2.16b", + "mov v5.16b, v2.16b", + "fmul s5, s2, s3", + "mov v18.16b, v5.16b", + "sub x20, x23, #0x10 (16)", + "ldr s2, [x20]", + "mov v6.16b, v5.16b", + "fadd s6, s5, s2", + "mov v18.16b, v6.16b", + "sub x20, x23, #0x10 (16)", + "str s6, [x20]", + "sub x20, x22, #0xc (12)", + "ldr s2, [x20]", + "mov v18.16b, v2.16b", + "mov v5.16b, v2.16b", + "fmul s5, s2, s4", + "mov v18.16b, v5.16b", + "sub x20, x23, #0xc (12)", + "ldr s2, [x20]", + "mov v6.16b, v5.16b", + "fadd s6, s5, s2", + "mov v18.16b, v6.16b", + "sub x20, x23, #0xc (12)", + "str s6, [x20]", + "sub x20, x22, #0x8 (8)", + "ldr s2, [x20]", + "mov v18.16b, v2.16b", + "mov v5.16b, v2.16b", + "fmul s5, s2, s3", + "mov v18.16b, v5.16b", + "sub x20, x23, #0x8 (8)", + "ldr s2, [x20]", + "mov v3.16b, v5.16b", + "fadd s3, s5, s2", + "mov v18.16b, v3.16b", + "sub x20, x23, #0x8 (8)", + "str s3, [x20]", + "sub x20, x22, #0x4 (4)", + "ldr s2, [x20]", + "mov v18.16b, v2.16b", + "mov v3.16b, v2.16b", + "fmul s3, s2, s4", + "mov v18.16b, v3.16b", + "sub x20, x23, #0x4 (4)", + "ldr s2, [x20]", + "mov v4.16b, v3.16b", + "fadd s4, s3, s2", + "mov v18.16b, v4.16b", + "sub x20, x23, #0x4 (4)", + "str s4, [x20]", + "mov x27, x21", + "subs w20, w21, #0x1 (1)", + "mov x26, x20", "cfinv", - "mov x10, x26" + "mov x10, x20" ] } } diff --git a/unittests/InstructionCountCI/FlagM/Primary.json b/unittests/InstructionCountCI/FlagM/Primary.json index b34877fcb8..95996178b4 100644 --- a/unittests/InstructionCountCI/FlagM/Primary.json +++ b/unittests/InstructionCountCI/FlagM/Primary.json @@ -12,2530 +12,3416 @@ }, "Instructions": { "add bl, cl": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 11, "Comment": "0x00", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "lsl w0, w7, #24", - "cmn w0, w5, lsl #24", - "add w26, w7, w5", - "bfxil x7, x26, #0, #8" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #24", + "cmn w0, w20, lsl #24", + "add w22, w21, w20", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x7, x20" ] }, "add bx, cx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 11, "Comment": "0x01", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "lsl w0, w7, #16", - "cmn w0, w5, lsl #16", - "add w26, w7, w5", - "bfxil x7, x26, #0, #16" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #16", + "cmn w0, w20, lsl #16", + "add w22, w21, w20", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x7, x20" ] }, "add ebx, ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x01", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "adds w26, w7, w5", - "mov x7, x26" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "adds w22, w21, w20", + "mov x26, x22", + "mov x7, x22" ] }, "add rbx, rcx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x01", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "adds x26, x7, x5", - "mov x7, x26" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "adds x22, x21, x20", + "mov x26, x22", + "mov x7, x22" ] }, "db 0x02, 0xcb": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 11, "Comment": [ "0x02", "add bl, cl but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "lsl w0, w5, #24", - "cmn w0, w7, lsl #24", - "add w26, w5, w7", - "bfxil x5, x26, #0, #8" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #24", + "cmn w0, w20, lsl #24", + "add w22, w21, w20", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x5, x20" ] }, "db 0x66, 0x03, 0xcb": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 11, "Comment": [ "0x03", "add bx, cx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "lsl w0, w5, #16", - "cmn w0, w7, lsl #16", - "add w26, w5, w7", - "bfxil x5, x26, #0, #16" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #16", + "cmn w0, w20, lsl #16", + "add w22, w21, w20", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x5, x20" ] }, "db 0x03, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0x03", "add ebx, ecx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "adds w26, w5, w7", - "mov x5, x26" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "adds w22, w21, w20", + "mov x26, x22", + "mov x5, x22" ] }, "db 0x48, 0x03, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0x03", "add rbx, rcx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "adds x26, x5, x7", - "mov x5, x26" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "adds x22, x21, x20", + "mov x26, x22", + "mov x5, x22" ] }, "add al, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "0x04", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #24", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #24", "cmn w0, w20, lsl #24", - "add w26, w27, #0x1 (1)", - "mov x4, x27", - "bfxil x4, x26, #0, #8" + "add w20, w21, #0x1 (1)", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "add ax, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "0x05", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #16", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #16", "cmn w0, w20, lsl #16", - "add w26, w27, #0x1 (1)", - "mov x4, x27", - "bfxil x4, x26, #0, #16" + "add w20, w21, #0x1 (1)", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "add eax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x05", "ExpectedArm64ASM": [ - "mov x27, x4", - "adds w26, w27, #0x1 (1)", - "mov x4, x26" + "mov x20, x4", + "mov x27, x20", + "adds w21, w20, #0x1 (1)", + "mov x26, x21", + "mov x4, x21" ] }, "add rax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x05", "ExpectedArm64ASM": [ - "mov x27, x4", - "adds x26, x27, #0x1 (1)", - "mov x4, x26" + "mov x20, x4", + "mov x27, x20", + "adds x21, x20, #0x1 (1)", + "mov x26, x21", + "mov x4, x21" ] }, "add al, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": "0x04", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "lsl w0, w4, #24", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #24", "cmn w0, w20, lsl #24", - "add w26, w4, #0xff (255)", - "bfxil x4, x26, #0, #8" + "add w20, w21, #0xff (255)", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "add ax, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": "0x05", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "mvn w27, w4", - "lsl w0, w4, #16", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #16", "cmn w0, w20, lsl #16", - "add w26, w4, w20", - "bfxil x4, x26, #0, #16" + "add w22, w21, w20", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x4, x20" ] }, "add eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x05", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "adds w26, w4, w20", - "mov x4, x26" + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "adds w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "add rax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x05", "ExpectedArm64ASM": [ - "mvn w27, w4", - "subs x26, x4, #0x1 (1)", - "mov x4, x26" + "mov x20, x4", + "mvn w21, w20", + "mov x27, x21", + "subs x21, x20, #0x1 (1)", + "mov x26, x21", + "mov x4, x21" ] }, "or bl, bh": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": "", "ExpectedArm64ASM": [ - "lsr w20, w7, #8", - "orr w26, w7, w20", - "bfxil x7, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x7", + "lsr w21, w20, #8", + "orr w22, w20, w21", + "mov x21, x20", + "bfxil x21, x22, #0, #8", + "mov x7, x21", + "mov x26, x22", + "cmn wzr, w22, lsl #24" ] }, "or bl, cl": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0x08", "ExpectedArm64ASM": [ - "orr w26, w7, w5", - "bfxil x7, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x5", + "mov x21, x7", + "orr w22, w21, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x7, x20", + "mov x26, x22", + "cmn wzr, w22, lsl #24" ] }, "or bx, cx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0x09", "ExpectedArm64ASM": [ - "orr w26, w7, w5", - "bfxil x7, x26, #0, #16", - "cmn wzr, w26, lsl #16" + "mov x20, x5", + "mov x21, x7", + "orr w22, w21, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x7, x20", + "mov x26, x22", + "cmn wzr, w22, lsl #16" ] }, "or ebx, ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x09", "ExpectedArm64ASM": [ - "orr w7, w7, w5", - "mov x26, x7", - "tst w7, w7" + "mov x20, x5", + "mov x21, x7", + "orr w22, w21, w20", + "mov x7, x22", + "mov x26, x22", + "tst w22, w22" ] }, "or rbx, rcx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x09", "ExpectedArm64ASM": [ - "orr x7, x7, x5", - "mov x26, x7", - "tst x7, x7" + "mov x20, x5", + "mov x21, x7", + "orr x22, x21, x20", + "mov x7, x22", + "mov x26, x22", + "tst x22, x22" ] }, "db 0x0A, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": [ "0x0A", "or bl, cl but modrm.rm as source" ], "ExpectedArm64ASM": [ - "orr w26, w5, w7", - "bfxil x5, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x7", + "mov x21, x5", + "orr w22, w21, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x5, x20", + "mov x26, x22", + "cmn wzr, w22, lsl #24" ] }, "db 0x66, 0x0B, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": [ "0x0B", "or bx, cx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "orr w26, w5, w7", - "bfxil x5, x26, #0, #16", - "cmn wzr, w26, lsl #16" + "mov x20, x7", + "mov x21, x5", + "orr w22, w21, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x5, x20", + "mov x26, x22", + "cmn wzr, w22, lsl #16" ] }, "db 0x0B, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x0B", "or ebx, ecx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "orr w5, w5, w7", - "mov x26, x5", - "tst w5, w5" + "mov x20, x7", + "mov x21, x5", + "orr w22, w21, w20", + "mov x5, x22", + "mov x26, x22", + "tst w22, w22" ] }, "db 0x48, 0x0B, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x0B", "or rbx, rcx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "orr x5, x5, x7", - "mov x26, x5", - "tst x5, x5" + "mov x20, x7", + "mov x21, x5", + "orr x22, x21, x20", + "mov x5, x22", + "mov x26, x22", + "tst x22, x22" ] }, "or al, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x0C", "ExpectedArm64ASM": [ - "orr w26, w4, #0x1", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "orr w21, w20, #0x1", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "or ax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x0D", "ExpectedArm64ASM": [ - "orr w26, w4, #0x1", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16" + "mov x20, x4", + "orr w21, w20, #0x1", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #16" ] }, "or eax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0D", "ExpectedArm64ASM": [ - "orr w4, w4, #0x1", - "mov x26, x4", - "tst w4, w4" + "mov x20, x4", + "orr w21, w20, #0x1", + "mov x4, x21", + "mov x26, x21", + "tst w21, w21" ] }, "or rax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0D", "ExpectedArm64ASM": [ - "orr x4, x4, #0x1", - "mov x26, x4", - "tst x4, x4" + "mov x20, x4", + "orr x21, x20, #0x1", + "mov x4, x21", + "mov x26, x21", + "tst x21, x21" ] }, "or al, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x0C", "ExpectedArm64ASM": [ - "orr w26, w4, #0xff", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "orr w21, w20, #0xff", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "or ax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x0D", "ExpectedArm64ASM": [ - "orr w26, w4, #0xffff", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16" + "mov x20, x4", + "orr w21, w20, #0xffff", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #16" ] }, "or eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0D", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "orr w4, w4, w20", - "mov x26, x4", - "tst w4, w4" + "mov x21, x4", + "orr w22, w21, w20", + "mov x4, x22", + "mov x26, x22", + "tst w22, w22" ] }, "or rax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0D", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "orr x4, x4, x20", - "mov x26, x4", - "tst x4, x4" + "mov x21, x4", + "orr x22, x21, x20", + "mov x4, x22", + "mov x26, x22", + "tst x22, x22" ] }, "adc bl, cl": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 23, "Comment": "0x10", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "cset w20, hs", - "adc w21, w7, w5", - "uxtb w26, w21", - "cmp x26, x5", - "cset x21, lo", - "cmp x26, x5", - "cset x22, ls", - "cmp x20, #0x1 (1)", - "csel x20, x22, x21, eq", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv", - "eor w20, w7, w5", - "eor w21, w26, w7", - "bic w20, w21, w20", - "rmif x20, #7, #nzcV", - "bfxil x7, x26, #0, #8" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "cset w22, hs", + "adc w23, w21, w20", + "uxtb w24, w23", + "cmp x24, x20", + "cset x23, lo", + "cmp x24, x20", + "cset x25, ls", + "cmp x22, #0x1 (1)", + "csel x30, x25, x23, eq", + "cmn wzr, w24, lsl #24", + "rmif x30, #63, #nzCv", + "eor w22, w21, w20", + "eor w20, w24, w21", + "bic w23, w20, w22", + "rmif x23, #7, #nzcV", + "mov x26, x24", + "mov x20, x21", + "bfxil x20, x24, #0, #8", + "mov x7, x20" ] }, "adc bx, cx": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 23, "Comment": "0x11", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "cset w20, hs", - "adc w21, w7, w5", - "uxth w26, w21", - "cmp x26, x5", - "cset x21, lo", - "cmp x26, x5", - "cset x22, ls", - "cmp x20, #0x1 (1)", - "csel x20, x22, x21, eq", - "cmn wzr, w26, lsl #16", - "rmif x20, #63, #nzCv", - "eor w20, w7, w5", - "eor w21, w26, w7", - "bic w20, w21, w20", - "rmif x20, #15, #nzcV", - "bfxil x7, x26, #0, #16" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "cset w22, hs", + "adc w23, w21, w20", + "uxth w24, w23", + "cmp x24, x20", + "cset x23, lo", + "cmp x24, x20", + "cset x25, ls", + "cmp x22, #0x1 (1)", + "csel x30, x25, x23, eq", + "cmn wzr, w24, lsl #16", + "rmif x30, #63, #nzCv", + "eor w22, w21, w20", + "eor w20, w24, w21", + "bic w23, w20, w22", + "rmif x23, #15, #nzcV", + "mov x26, x24", + "mov x20, x21", + "bfxil x20, x24, #0, #16", + "mov x7, x20" ] }, "adc ebx, ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x11", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "adcs w26, w7, w5", - "mov x7, x26" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "adcs w22, w21, w20", + "mov x26, x22", + "mov x7, x22" ] }, "adc rbx, rcx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x11", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "adcs x26, x7, x5", - "mov x7, x26" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "adcs x22, x21, x20", + "mov x26, x22", + "mov x7, x22" ] }, "db 0x12, 0xcb": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 23, "Comment": [ "0x12", "adc bl, cl but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "cset w20, hs", - "adc w21, w5, w7", - "uxtb w26, w21", - "cmp x26, x7", - "cset x21, lo", - "cmp x26, x7", - "cset x22, ls", - "cmp x20, #0x1 (1)", - "csel x20, x22, x21, eq", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv", - "eor w20, w5, w7", - "eor w21, w26, w5", - "bic w20, w21, w20", - "rmif x20, #7, #nzcV", - "bfxil x5, x26, #0, #8" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "cset w22, hs", + "adc w23, w21, w20", + "uxtb w24, w23", + "cmp x24, x20", + "cset x23, lo", + "cmp x24, x20", + "cset x25, ls", + "cmp x22, #0x1 (1)", + "csel x30, x25, x23, eq", + "cmn wzr, w24, lsl #24", + "rmif x30, #63, #nzCv", + "eor w22, w21, w20", + "eor w20, w24, w21", + "bic w23, w20, w22", + "rmif x23, #7, #nzcV", + "mov x26, x24", + "mov x20, x21", + "bfxil x20, x24, #0, #8", + "mov x5, x20" ] }, "db 0x66, 0x13, 0xcb": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 23, "Comment": [ "0x13", "adc bx, cx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "cset w20, hs", - "adc w21, w5, w7", - "uxth w26, w21", - "cmp x26, x7", - "cset x21, lo", - "cmp x26, x7", - "cset x22, ls", - "cmp x20, #0x1 (1)", - "csel x20, x22, x21, eq", - "cmn wzr, w26, lsl #16", - "rmif x20, #63, #nzCv", - "eor w20, w5, w7", - "eor w21, w26, w5", - "bic w20, w21, w20", - "rmif x20, #15, #nzcV", - "bfxil x5, x26, #0, #16" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "cset w22, hs", + "adc w23, w21, w20", + "uxth w24, w23", + "cmp x24, x20", + "cset x23, lo", + "cmp x24, x20", + "cset x25, ls", + "cmp x22, #0x1 (1)", + "csel x30, x25, x23, eq", + "cmn wzr, w24, lsl #16", + "rmif x30, #63, #nzCv", + "eor w22, w21, w20", + "eor w20, w24, w21", + "bic w23, w20, w22", + "rmif x23, #15, #nzcV", + "mov x26, x24", + "mov x20, x21", + "bfxil x20, x24, #0, #16", + "mov x5, x20" ] }, "db 0x13, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0x13", "adc ebx, ecx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "adcs w26, w5, w7", - "mov x5, x26" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "adcs w22, w21, w20", + "mov x26, x22", + "mov x5, x22" ] }, "db 0x48, 0x13, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0x13", "adc rbx, rcx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "adcs x26, x5, x7", - "mov x5, x26" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "adcs x22, x21, x20", + "mov x26, x22", + "mov x5, x22" ] }, "adc al, 1": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 20, "Comment": "0x14", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "cset w21, hs", - "adc w20, w27, w20", - "uxtb w26, w20", - "cmp w26, #0x1 (1)", - "cset x20, lo", - "cmp w26, #0x1 (1)", - "cset x22, ls", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv", - "bic w20, w26, w27", - "rmif x20, #7, #nzcV", - "mov x4, x27", - "bfxil x4, x26, #0, #8" + "mov x21, x4", + "mov x27, x21", + "cset w22, hs", + "adc w23, w21, w20", + "uxtb w20, w23", + "cmp w20, #0x1 (1)", + "cset x23, lo", + "cmp w20, #0x1 (1)", + "cset x24, ls", + "cmp x22, #0x1 (1)", + "csel x25, x24, x23, eq", + "cmn wzr, w20, lsl #24", + "rmif x25, #63, #nzCv", + "bic w22, w20, w21", + "rmif x22, #7, #nzcV", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "adc ax, 1": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 20, "Comment": "0x15", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "cset w21, hs", - "adc w20, w27, w20", - "uxth w26, w20", - "cmp w26, #0x1 (1)", - "cset x20, lo", - "cmp w26, #0x1 (1)", - "cset x22, ls", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #16", - "rmif x20, #63, #nzCv", - "bic w20, w26, w27", - "rmif x20, #15, #nzcV", - "mov x4, x27", - "bfxil x4, x26, #0, #16" + "mov x21, x4", + "mov x27, x21", + "cset w22, hs", + "adc w23, w21, w20", + "uxth w20, w23", + "cmp w20, #0x1 (1)", + "cset x23, lo", + "cmp w20, #0x1 (1)", + "cset x24, ls", + "cmp x22, #0x1 (1)", + "csel x25, x24, x23, eq", + "cmn wzr, w20, lsl #16", + "rmif x25, #63, #nzCv", + "bic w22, w20, w21", + "rmif x22, #15, #nzcV", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "adc eax, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x15", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "adcs w26, w27, w20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adcs w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "adc rax, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x15", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "adcs x26, x27, x20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adcs x22, x21, x20", + "mov x26, x22", + "mov x4, x22" ] }, "adc al, -1": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 21, "Comment": "0x14", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "cset w21, hs", - "adc w20, w4, w20", - "uxtb w26, w20", - "cmp w26, #0xff (255)", - "cset x20, lo", - "cmp w26, #0xff (255)", - "cset x22, ls", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv", - "bic w20, w4, w26", - "rmif x20, #7, #nzcV", - "bfxil x4, x26, #0, #8" + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "cset w22, hs", + "adc w23, w21, w20", + "uxtb w20, w23", + "cmp w20, #0xff (255)", + "cset x23, lo", + "cmp w20, #0xff (255)", + "cset x24, ls", + "cmp x22, #0x1 (1)", + "csel x25, x24, x23, eq", + "cmn wzr, w20, lsl #24", + "rmif x25, #63, #nzCv", + "bic w22, w21, w20", + "rmif x22, #7, #nzcV", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "adc ax, -1": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 21, "Comment": "0x15", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "mvn w27, w4", - "cset w21, hs", - "adc w22, w4, w20", - "uxth w26, w22", - "cmp w26, w20", - "cset x22, lo", - "cmp w26, w20", - "cset x20, ls", - "cmp x21, #0x1 (1)", - "csel x20, x20, x22, eq", - "cmn wzr, w26, lsl #16", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "cset w22, hs", + "adc w23, w21, w20", + "uxth w24, w23", + "cmp w24, w20", + "cset x23, lo", + "cmp w24, w20", + "cset x25, ls", + "cmp x22, #0x1 (1)", + "csel x20, x25, x23, eq", + "cmn wzr, w24, lsl #16", "rmif x20, #63, #nzCv", - "bic w20, w4, w26", + "bic w20, w21, w24", "rmif x20, #15, #nzcV", - "bfxil x4, x26, #0, #16" + "mov x26, x24", + "mov x20, x21", + "bfxil x20, x24, #0, #16", + "mov x4, x20" ] }, "adc eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x15", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "adcs w26, w4, w20", - "mov x4, x26" + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "adcs w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "adc rax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x15", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "mvn w27, w4", - "adcs x26, x4, x20", - "mov x4, x26" + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "adcs x22, x21, x20", + "mov x26, x22", + "mov x4, x22" ] }, "sbb bl, cl": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 24, "Comment": "0x18", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "cset w20, hs", - "add w21, w5, w20", - "sub w21, w7, w21", - "uxtb w26, w21", - "cmp x26, x7", - "cset x21, hi", - "cmp x26, x7", - "cset x22, hs", - "cmp x20, #0x1 (1)", - "csel x20, x22, x21, eq", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv", - "eor w20, w7, w5", - "eor w21, w26, w7", - "and w20, w21, w20", - "rmif x20, #7, #nzcV", - "bfxil x7, x26, #0, #8" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "cset w22, hs", + "add w23, w20, w22", + "sub w24, w21, w23", + "uxtb w23, w24", + "cmp x23, x21", + "cset x24, hi", + "cmp x23, x21", + "cset x25, hs", + "cmp x22, #0x1 (1)", + "csel x30, x25, x24, eq", + "cmn wzr, w23, lsl #24", + "rmif x30, #63, #nzCv", + "eor w22, w21, w20", + "eor w20, w23, w21", + "and w24, w20, w22", + "rmif x24, #7, #nzcV", + "mov x26, x23", + "mov x20, x21", + "bfxil x20, x23, #0, #8", + "mov x7, x20" ] }, "sbb bx, cx": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 24, "Comment": "0x19", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "cset w20, hs", - "add w21, w5, w20", - "sub w21, w7, w21", - "uxth w26, w21", - "cmp x26, x7", - "cset x21, hi", - "cmp x26, x7", - "cset x22, hs", - "cmp x20, #0x1 (1)", - "csel x20, x22, x21, eq", - "cmn wzr, w26, lsl #16", - "rmif x20, #63, #nzCv", - "eor w20, w7, w5", - "eor w21, w26, w7", - "and w20, w21, w20", - "rmif x20, #15, #nzcV", - "bfxil x7, x26, #0, #16" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "cset w22, hs", + "add w23, w20, w22", + "sub w24, w21, w23", + "uxth w23, w24", + "cmp x23, x21", + "cset x24, hi", + "cmp x23, x21", + "cset x25, hs", + "cmp x22, #0x1 (1)", + "csel x30, x25, x24, eq", + "cmn wzr, w23, lsl #16", + "rmif x30, #63, #nzCv", + "eor w22, w21, w20", + "eor w20, w23, w21", + "and w24, w20, w22", + "rmif x24, #15, #nzcV", + "mov x26, x23", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x7, x20" ] }, "sbb ebx, ecx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x19", "ExpectedArm64ASM": [ - "eor w27, w7, w5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", "cfinv", - "sbcs w26, w7, w5", + "sbcs w22, w21, w20", "cfinv", - "mov x7, x26" + "mov x26, x22", + "mov x7, x22" ] }, "sbb rbx, rcx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x19", "ExpectedArm64ASM": [ - "eor w27, w7, w5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", "cfinv", - "sbcs x26, x7, x5", + "sbcs x22, x21, x20", "cfinv", - "mov x7, x26" + "mov x26, x22", + "mov x7, x22" ] }, "db 0x1A, 0xcb": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 24, "Comment": [ "0x1A", "sbb bl, cl but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "cset w20, hs", - "add w21, w7, w20", - "sub w21, w5, w21", - "uxtb w26, w21", - "cmp x26, x5", - "cset x21, hi", - "cmp x26, x5", - "cset x22, hs", - "cmp x20, #0x1 (1)", - "csel x20, x22, x21, eq", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv", - "eor w20, w5, w7", - "eor w21, w26, w5", - "and w20, w21, w20", - "rmif x20, #7, #nzcV", - "bfxil x5, x26, #0, #8" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "cset w22, hs", + "add w23, w20, w22", + "sub w24, w21, w23", + "uxtb w23, w24", + "cmp x23, x21", + "cset x24, hi", + "cmp x23, x21", + "cset x25, hs", + "cmp x22, #0x1 (1)", + "csel x30, x25, x24, eq", + "cmn wzr, w23, lsl #24", + "rmif x30, #63, #nzCv", + "eor w22, w21, w20", + "eor w20, w23, w21", + "and w24, w20, w22", + "rmif x24, #7, #nzcV", + "mov x26, x23", + "mov x20, x21", + "bfxil x20, x23, #0, #8", + "mov x5, x20" ] }, "db 0x66, 0x1B, 0xcb": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 24, "Comment": [ "0x1B", "sbb bx, cx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "cset w20, hs", - "add w21, w7, w20", - "sub w21, w5, w21", - "uxth w26, w21", - "cmp x26, x5", - "cset x21, hi", - "cmp x26, x5", - "cset x22, hs", - "cmp x20, #0x1 (1)", - "csel x20, x22, x21, eq", - "cmn wzr, w26, lsl #16", - "rmif x20, #63, #nzCv", - "eor w20, w5, w7", - "eor w21, w26, w5", - "and w20, w21, w20", - "rmif x20, #15, #nzcV", - "bfxil x5, x26, #0, #16" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "cset w22, hs", + "add w23, w20, w22", + "sub w24, w21, w23", + "uxth w23, w24", + "cmp x23, x21", + "cset x24, hi", + "cmp x23, x21", + "cset x25, hs", + "cmp x22, #0x1 (1)", + "csel x30, x25, x24, eq", + "cmn wzr, w23, lsl #16", + "rmif x30, #63, #nzCv", + "eor w22, w21, w20", + "eor w20, w23, w21", + "and w24, w20, w22", + "rmif x24, #15, #nzcV", + "mov x26, x23", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x5, x20" ] }, "db 0x1B, 0xcb": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0x1B", "sbb ebx, ecx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", "cfinv", - "sbcs w26, w5, w7", + "sbcs w22, w21, w20", "cfinv", - "mov x5, x26" + "mov x26, x22", + "mov x5, x22" ] }, "db 0x48, 0x1B, 0xcb": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0x1B", "sbb rbx, rcx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", "cfinv", - "sbcs x26, x5, x7", + "sbcs x22, x21, x20", "cfinv", - "mov x5, x26" + "mov x26, x22", + "mov x5, x22" ] }, "sbb al, 1": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 21, "Comment": "0x1C", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "cset w21, hs", - "add w20, w20, w21", - "sub w20, w27, w20", - "uxtb w26, w20", - "cmp x26, x27", + "mov x21, x4", + "mov x27, x21", + "cset w22, hs", + "add w23, w20, w22", + "sub w20, w21, w23", + "uxtb w23, w20", + "cmp x23, x21", "cset x20, hi", - "cmp x26, x27", - "cset x22, hs", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv", - "bic w20, w27, w26", + "cmp x23, x21", + "cset x24, hs", + "cmp x22, #0x1 (1)", + "csel x25, x24, x20, eq", + "cmn wzr, w23, lsl #24", + "rmif x25, #63, #nzCv", + "bic w20, w21, w23", "rmif x20, #7, #nzcV", - "mov x4, x27", - "bfxil x4, x26, #0, #8" + "mov x26, x23", + "mov x20, x21", + "bfxil x20, x23, #0, #8", + "mov x4, x20" ] }, "sbb ax, 1": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 21, "Comment": "0x1D", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "cset w21, hs", - "add w20, w20, w21", - "sub w20, w27, w20", - "uxth w26, w20", - "cmp x26, x27", + "mov x21, x4", + "mov x27, x21", + "cset w22, hs", + "add w23, w20, w22", + "sub w20, w21, w23", + "uxth w23, w20", + "cmp x23, x21", "cset x20, hi", - "cmp x26, x27", - "cset x22, hs", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #16", - "rmif x20, #63, #nzCv", - "bic w20, w27, w26", + "cmp x23, x21", + "cset x24, hs", + "cmp x22, #0x1 (1)", + "csel x25, x24, x20, eq", + "cmn wzr, w23, lsl #16", + "rmif x25, #63, #nzCv", + "bic w20, w21, w23", "rmif x20, #15, #nzcV", - "mov x4, x27", - "bfxil x4, x26, #0, #16" + "mov x26, x23", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x4, x20" ] }, "sbb eax, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "0x1D", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", + "mov x21, x4", + "mov x27, x21", "cfinv", - "sbcs w26, w27, w20", + "sbcs w22, w21, w20", "cfinv", - "mov x4, x26" + "mov x26, x22", + "mov x4, x22" ] }, "sbb rax, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "0x1D", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", + "mov x21, x4", + "mov x27, x21", "cfinv", - "sbcs x26, x27, x20", + "sbcs x22, x21, x20", "cfinv", - "mov x4, x26" + "mov x26, x22", + "mov x4, x22" ] }, "sbb al, -1": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 22, "Comment": "0x1C", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "cset w21, hs", - "add w20, w20, w21", - "sub w20, w4, w20", - "uxtb w26, w20", - "cmp x26, x4", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "cset w22, hs", + "add w23, w20, w22", + "sub w20, w21, w23", + "uxtb w23, w20", + "cmp x23, x21", "cset x20, hi", - "cmp x26, x4", - "cset x22, hs", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv", - "bic w20, w26, w4", + "cmp x23, x21", + "cset x24, hs", + "cmp x22, #0x1 (1)", + "csel x25, x24, x20, eq", + "cmn wzr, w23, lsl #24", + "rmif x25, #63, #nzCv", + "bic w20, w23, w21", "rmif x20, #7, #nzcV", - "bfxil x4, x26, #0, #8" + "mov x26, x23", + "mov x20, x21", + "bfxil x20, x23, #0, #8", + "mov x4, x20" ] }, "sbb ax, -1": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 22, "Comment": "0x1D", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "mvn w27, w4", - "cset w21, hs", - "add w20, w20, w21", - "sub w20, w4, w20", - "uxth w26, w20", - "cmp x26, x4", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "cset w22, hs", + "add w23, w20, w22", + "sub w20, w21, w23", + "uxth w23, w20", + "cmp x23, x21", "cset x20, hi", - "cmp x26, x4", - "cset x22, hs", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #16", - "rmif x20, #63, #nzCv", - "bic w20, w26, w4", + "cmp x23, x21", + "cset x24, hs", + "cmp x22, #0x1 (1)", + "csel x25, x24, x20, eq", + "cmn wzr, w23, lsl #16", + "rmif x25, #63, #nzCv", + "bic w20, w23, w21", "rmif x20, #15, #nzcV", - "bfxil x4, x26, #0, #16" + "mov x26, x23", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x4, x20" ] }, "sbb eax, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "0x1D", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", "cfinv", - "sbcs w26, w4, w20", + "sbcs w22, w21, w20", "cfinv", - "mov x4, x26" + "mov x26, x22", + "mov x4, x22" ] }, "sbb rax, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "0x1D", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "mvn w27, w4", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", "cfinv", - "sbcs x26, x4, x20", + "sbcs x22, x21, x20", "cfinv", - "mov x4, x26" + "mov x26, x22", + "mov x4, x22" ] }, "and bl, cl": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0x20", "ExpectedArm64ASM": [ - "and w26, w7, w5", - "cmn wzr, w26, lsl #24", - "bfxil x7, x26, #0, #8" + "mov x20, x5", + "mov x21, x7", + "and w22, w21, w20", + "cmn wzr, w22, lsl #24", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x7, x20" ] }, "and bx, cx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0x21", "ExpectedArm64ASM": [ - "and w26, w7, w5", - "cmn wzr, w26, lsl #16", - "bfxil x7, x26, #0, #16" + "mov x20, x5", + "mov x21, x7", + "and w22, w21, w20", + "cmn wzr, w22, lsl #16", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x7, x20" ] }, "and ebx, ecx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x21", "ExpectedArm64ASM": [ - "ands w26, w7, w5", - "mov x7, x26" + "mov x20, x5", + "mov x21, x7", + "ands w22, w21, w20", + "mov x26, x22", + "mov x7, x22" ] }, "and rbx, rcx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x21", "ExpectedArm64ASM": [ - "ands x26, x7, x5", - "mov x7, x26" + "mov x20, x5", + "mov x21, x7", + "ands x22, x21, x20", + "mov x26, x22", + "mov x7, x22" ] }, "db 0x22, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": [ "0x22", "and bl, cl but modrm.rm as source" ], "ExpectedArm64ASM": [ - "and w26, w5, w7", - "cmn wzr, w26, lsl #24", - "bfxil x5, x26, #0, #8" + "mov x20, x7", + "mov x21, x5", + "and w22, w21, w20", + "cmn wzr, w22, lsl #24", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x5, x20" ] }, "db 0x66, 0x23, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": [ "0x23", "and bx, cx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "and w26, w5, w7", - "cmn wzr, w26, lsl #16", - "bfxil x5, x26, #0, #16" + "mov x20, x7", + "mov x21, x5", + "and w22, w21, w20", + "cmn wzr, w22, lsl #16", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x5, x20" ] }, "db 0x23, 0xcb": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "0x23", "and ebx, ecx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "ands w26, w5, w7", - "mov x5, x26" + "mov x20, x7", + "mov x21, x5", + "ands w22, w21, w20", + "mov x26, x22", + "mov x5, x22" ] }, "db 0x48, 0x23, 0xcb": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "0x23", "and rbx, rcx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "ands x26, x5, x7", - "mov x5, x26" + "mov x20, x7", + "mov x21, x5", + "ands x22, x21, x20", + "mov x26, x22", + "mov x5, x22" ] }, "and al, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x24", "ExpectedArm64ASM": [ - "and w26, w4, #0x1", - "cmn wzr, w26, lsl #24", - "bfxil x4, x26, #0, #8" + "mov x20, x4", + "and w21, w20, #0x1", + "cmn wzr, w21, lsl #24", + "mov x26, x21", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22" ] }, "and ax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x25", "ExpectedArm64ASM": [ - "and w26, w4, #0x1", - "cmn wzr, w26, lsl #16", - "bfxil x4, x26, #0, #16" + "mov x20, x4", + "and w21, w20, #0x1", + "cmn wzr, w21, lsl #16", + "mov x26, x21", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "and eax, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x25", "ExpectedArm64ASM": [ - "ands w26, w4, #0x1", - "mov x4, x26" + "mov x20, x4", + "ands w21, w20, #0x1", + "mov x26, x21", + "mov x4, x21" ] }, "and rax, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x25", "ExpectedArm64ASM": [ - "ands x26, x4, #0x1", - "mov x4, x26" + "mov x20, x4", + "ands x21, x20, #0x1", + "mov x26, x21", + "mov x4, x21" ] }, "and al, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x24", "ExpectedArm64ASM": [ - "and w26, w4, #0xff", - "cmn wzr, w26, lsl #24", - "bfxil x4, x26, #0, #8" + "mov x20, x4", + "and w21, w20, #0xff", + "cmn wzr, w21, lsl #24", + "mov x26, x21", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22" ] }, "and ax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x25", "ExpectedArm64ASM": [ - "and w26, w4, #0xffff", - "cmn wzr, w26, lsl #16", - "bfxil x4, x26, #0, #16" + "mov x20, x4", + "and w21, w20, #0xffff", + "cmn wzr, w21, lsl #16", + "mov x26, x21", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "and eax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x25", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "ands w26, w4, w20", - "mov x4, x26" + "mov x21, x4", + "ands w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "and rax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x25", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "ands x26, x4, x20", - "mov x4, x26" + "mov x21, x4", + "ands x22, x21, x20", + "mov x26, x22", + "mov x4, x22" ] }, "sub bl, cl": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 12, "Comment": "0x28", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "lsl w0, w7, #24", - "cmp w0, w5, lsl #24", - "sub w26, w7, w5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #24", + "cmp w0, w20, lsl #24", + "sub w22, w21, w20", + "mov x26, x22", "cfinv", - "bfxil x7, x26, #0, #8" + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x7, x20" ] }, "sub bx, cx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 12, "Comment": "0x29", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "lsl w0, w7, #16", - "cmp w0, w5, lsl #16", - "sub w26, w7, w5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #16", + "cmp w0, w20, lsl #16", + "sub w22, w21, w20", + "mov x26, x22", "cfinv", - "bfxil x7, x26, #0, #16" + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x7, x20" ] }, "sub ebx, ecx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": "0x29", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "subs w26, w7, w5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "cfinv", - "mov x7, x26" + "mov x7, x22" ] }, "sub rbx, rcx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": "0x29", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "subs x26, x7, x5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "subs x22, x21, x20", + "mov x26, x22", "cfinv", - "mov x7, x26" + "mov x7, x22" ] }, "db 0x2A, 0xcb": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 12, "Comment": [ "0x2A", "sub bl, cl but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "lsl w0, w5, #24", - "cmp w0, w7, lsl #24", - "sub w26, w5, w7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #24", + "cmp w0, w20, lsl #24", + "sub w22, w21, w20", + "mov x26, x22", "cfinv", - "bfxil x5, x26, #0, #8" + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x5, x20" ] }, "db 0x66, 0x2B, 0xcb": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 12, "Comment": [ "0x2B", "sub bx, cx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "lsl w0, w5, #16", - "cmp w0, w7, lsl #16", - "sub w26, w5, w7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #16", + "cmp w0, w20, lsl #16", + "sub w22, w21, w20", + "mov x26, x22", "cfinv", - "bfxil x5, x26, #0, #16" + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x5, x20" ] }, "db 0x2B, 0xcb": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "0x2B", "sub ebx, ecx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "subs w26, w5, w7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "cfinv", - "mov x5, x26" + "mov x5, x22" ] }, "db 0x48, 0x2B, 0xcb": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "0x2B", "sub rbx, rcx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "subs x26, x5, x7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "subs x22, x21, x20", + "mov x26, x22", "cfinv", - "mov x5, x26" + "mov x5, x22" ] }, "sub al, 1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "0x2C", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #24", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #24", "cmp w0, w20, lsl #24", - "sub w26, w27, #0x1 (1)", + "sub w20, w21, #0x1 (1)", + "mov x26, x20", "cfinv", - "mov x4, x27", - "bfxil x4, x26, #0, #8" + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "sub ax, 1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "0x2D", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #16", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #16", "cmp w0, w20, lsl #16", - "sub w26, w27, #0x1 (1)", + "sub w20, w21, #0x1 (1)", + "mov x26, x20", "cfinv", - "mov x4, x27", - "bfxil x4, x26, #0, #16" + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "sub eax, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x2D", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs w26, w27, #0x1 (1)", + "mov x20, x4", + "mov x27, x20", + "subs w21, w20, #0x1 (1)", + "mov x26, x21", "cfinv", - "mov x4, x26" + "mov x4, x21" ] }, "sub rax, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x2D", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs x26, x27, #0x1 (1)", + "mov x20, x4", + "mov x27, x20", + "subs x21, x20, #0x1 (1)", + "mov x26, x21", "cfinv", - "mov x4, x26" + "mov x4, x21" ] }, "sub al, -1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 12, "Comment": "0x2C", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "lsl w0, w4, #24", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #24", "cmp w0, w20, lsl #24", - "sub w26, w4, #0xff (255)", + "sub w20, w21, #0xff (255)", + "mov x26, x20", "cfinv", - "bfxil x4, x26, #0, #8" + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "sub ax, -1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 12, "Comment": "0x2D", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "mvn w27, w4", - "lsl w0, w4, #16", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #16", "cmp w0, w20, lsl #16", - "sub w26, w4, w20", + "sub w22, w21, w20", + "mov x26, x22", "cfinv", - "bfxil x4, x26, #0, #16" + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x4, x20" ] }, "sub eax, -1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x2D", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "subs w26, w4, w20", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "cfinv", - "mov x4, x26" + "mov x4, x22" ] }, "sub rax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x2D", "ExpectedArm64ASM": [ - "mvn w27, w4", - "adds x26, x4, #0x1 (1)", + "mov x20, x4", + "mvn w21, w20", + "mov x27, x21", + "adds x21, x20, #0x1 (1)", + "mov x26, x21", "cfinv", - "mov x4, x26" + "mov x4, x21" ] }, "xor bl, cl": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0x30", "ExpectedArm64ASM": [ - "eor w26, w7, w5", - "bfxil x7, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x7, x20", + "mov x26, x22", + "cmn wzr, w22, lsl #24" ] }, "xor bx, cx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0x31", "ExpectedArm64ASM": [ - "eor w26, w7, w5", - "bfxil x7, x26, #0, #16", - "cmn wzr, w26, lsl #16" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x7, x20", + "mov x26, x22", + "cmn wzr, w22, lsl #16" ] }, "xor ebx, ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x31", "ExpectedArm64ASM": [ - "eor w7, w7, w5", - "mov x26, x7", - "tst w7, w7" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x7, x22", + "mov x26, x22", + "tst w22, w22" ] }, "xor rbx, rcx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x31", "ExpectedArm64ASM": [ - "eor x7, x7, x5", - "mov x26, x7", - "tst x7, x7" + "mov x20, x5", + "mov x21, x7", + "eor x22, x21, x20", + "mov x7, x22", + "mov x26, x22", + "tst x22, x22" ] }, "db 0x32, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": [ "0x32", "xor bl, cl but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w26, w5, w7", - "bfxil x5, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x5, x20", + "mov x26, x22", + "cmn wzr, w22, lsl #24" ] }, "db 0x66, 0x33, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": [ "0x33", "xor bx, cx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w26, w5, w7", - "bfxil x5, x26, #0, #16", - "cmn wzr, w26, lsl #16" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x5, x20", + "mov x26, x22", + "cmn wzr, w22, lsl #16" ] }, "db 0x33, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x33", "xor ebx, ecx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w5, w5, w7", - "mov x26, x5", - "tst w5, w5" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x5, x22", + "mov x26, x22", + "tst w22, w22" ] }, "db 0x48, 0x33, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x33", "xor rbx, rcx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor x5, x5, x7", - "mov x26, x5", - "tst x5, x5" + "mov x20, x7", + "mov x21, x5", + "eor x22, x21, x20", + "mov x5, x22", + "mov x26, x22", + "tst x22, x22" ] }, "xor al, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x34", "ExpectedArm64ASM": [ - "eor w26, w4, #0x1", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "eor w21, w20, #0x1", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "xor ax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x35", "ExpectedArm64ASM": [ - "eor w26, w4, #0x1", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16" + "mov x20, x4", + "eor w21, w20, #0x1", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #16" ] }, "xor eax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x35", "ExpectedArm64ASM": [ - "eor w4, w4, #0x1", - "mov x26, x4", - "tst w4, w4" + "mov x20, x4", + "eor w21, w20, #0x1", + "mov x4, x21", + "mov x26, x21", + "tst w21, w21" ] }, "xor rax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x35", "ExpectedArm64ASM": [ - "eor x4, x4, #0x1", - "mov x26, x4", - "tst x4, x4" + "mov x20, x4", + "eor x21, x20, #0x1", + "mov x4, x21", + "mov x26, x21", + "tst x21, x21" ] }, "cmp bl, cl": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x38", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "lsl w0, w7, #24", - "cmp w0, w5, lsl #24", - "sub w26, w7, w5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #24", + "cmp w0, w20, lsl #24", + "sub w22, w21, w20", + "mov x26, x22", "cfinv" ] }, "xor al, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x34", "ExpectedArm64ASM": [ - "eor w26, w4, #0xff", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "eor w21, w20, #0xff", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "xor ax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x35", "ExpectedArm64ASM": [ - "eor w26, w4, #0xffff", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16" + "mov x20, x4", + "eor w21, w20, #0xffff", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #16" ] }, "xor eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x35", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "eor w4, w4, w20", - "mov x26, x4", - "tst w4, w4" + "mov x21, x4", + "eor w22, w21, w20", + "mov x4, x22", + "mov x26, x22", + "tst w22, w22" ] }, "xor rax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x35", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "eor x4, x4, x20", - "mov x26, x4", - "tst x4, x4" + "mov x21, x4", + "eor x22, x21, x20", + "mov x4, x22", + "mov x26, x22", + "tst x22, x22" ] }, "cmp bx, cx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x39", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "lsl w0, w7, #16", - "cmp w0, w5, lsl #16", - "sub w26, w7, w5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #16", + "cmp w0, w20, lsl #16", + "sub w22, w21, w20", + "mov x26, x22", "cfinv" ] }, "cmp ebx, ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x39", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "subs w26, w7, w5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "cfinv" ] }, "cmp rbx, rcx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x39", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "subs x26, x7, x5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "subs x22, x21, x20", + "mov x26, x22", "cfinv" ] }, "db 0x3A, 0xcb": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0x3A", "cmp bl, cl but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "lsl w0, w5, #24", - "cmp w0, w7, lsl #24", - "sub w26, w5, w7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #24", + "cmp w0, w20, lsl #24", + "sub w22, w21, w20", + "mov x26, x22", "cfinv" ] }, "db 0x66, 0x3B, 0xcb": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0x3B", "cmp bx, cx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "lsl w0, w5, #16", - "cmp w0, w7, lsl #16", - "sub w26, w5, w7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #16", + "cmp w0, w20, lsl #16", + "sub w22, w21, w20", + "mov x26, x22", "cfinv" ] }, "db 0x3B, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0x3B", "cmp ebx, ecx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "subs w26, w5, w7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "cfinv" ] }, "db 0x48, 0x3B, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0x3B", "cmp rbx, rcx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "subs x26, x5, x7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "subs x22, x21, x20", + "mov x26, x22", "cfinv" ] }, "cmp al, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "0x3C", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #24", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #24", "cmp w0, w20, lsl #24", - "sub w26, w27, #0x1 (1)", + "sub w20, w21, #0x1 (1)", + "mov x26, x20", "cfinv" ] }, "cmp ax, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "0x3D", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #16", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #16", "cmp w0, w20, lsl #16", - "sub w26, w27, #0x1 (1)", + "sub w20, w21, #0x1 (1)", + "mov x26, x20", "cfinv" ] }, "cmp eax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x3D", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs w26, w27, #0x1 (1)", + "mov x20, x4", + "mov x27, x20", + "subs w21, w20, #0x1 (1)", + "mov x26, x21", "cfinv" ] }, "cmp rax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x3D", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs x26, x27, #0x1 (1)", + "mov x20, x4", + "mov x27, x20", + "subs x21, x20, #0x1 (1)", + "mov x26, x21", "cfinv" ] }, "cmp al, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "0x3C", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "lsl w0, w4, #24", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #24", "cmp w0, w20, lsl #24", - "sub w26, w4, #0xff (255)", + "sub w20, w21, #0xff (255)", + "mov x26, x20", "cfinv" ] }, "cmp ax, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "0x3D", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "mvn w27, w4", - "lsl w0, w4, #16", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #16", "cmp w0, w20, lsl #16", - "sub w26, w4, w20", + "sub w22, w21, w20", + "mov x26, x22", "cfinv" ] }, "cmp eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x3D", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "subs w26, w4, w20", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "cfinv" ] }, "cmp rax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x3D", "ExpectedArm64ASM": [ - "mvn w27, w4", - "adds x26, x4, #0x1 (1)", + "mov x20, x4", + "mvn w21, w20", + "mov x27, x21", + "adds x21, x20, #0x1 (1)", + "mov x26, x21", "cfinv" ] }, "imul ax, bx, 257": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": "0x69", "ExpectedArm64ASM": [ - "sxth x20, w7", - "mov w21, #0x101", - "mul x20, x20, x21", - "sbfx x21, x20, #16, #16", - "bfxil x4, x20, #0, #16", - "sbfx x20, x20, #15, #1", - "cmp x21, x20", + "mov x20, x7", + "sxth x21, w20", + "mov w20, #0x101", + "mul x22, x21, x20", + "sbfx x20, x22, #16, #16", + "mov x21, x4", + "mov x23, x21", + "bfxil x23, x22, #0, #16", + "mov x4, x23", + "sbfx x21, x22, #15, #1", + "cmp x20, x21", "ccmn xzr, #0, #nzCV, eq" ] }, "imul eax, ebx, 257": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x69", "ExpectedArm64ASM": [ - "mov w20, #0x101", - "smull x21, w7, w20", - "asr x21, x21, #32", - "mul w4, w7, w20", - "sbfx x20, x4, #31, #1", - "cmp x21, x20", + "mov x20, x7", + "mov w21, #0x101", + "smull x22, w20, w21", + "asr x23, x22, #32", + "mul w22, w20, w21", + "mov x4, x22", + "sbfx x20, x22, #31, #1", + "cmp x23, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "imul rax, rbx, 257": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "0x69", "ExpectedArm64ASM": [ - "mov w20, #0x101", - "smulh x21, x7, x20", - "mul x4, x7, x20", - "asr x20, x4, #63", - "cmp x21, x20", + "mov x20, x7", + "mov w21, #0x101", + "smulh x22, x20, x21", + "mul x23, x20, x21", + "mov x4, x23", + "asr x20, x23, #63", + "cmp x22, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "imul ax, bx, 3": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": "0x6b", "ExpectedArm64ASM": [ - "sxth x20, w7", - "mov w21, #0x3", - "mul x20, x20, x21", - "sbfx x21, x20, #16, #16", - "bfxil x4, x20, #0, #16", - "sbfx x20, x20, #15, #1", - "cmp x21, x20", + "mov x20, x7", + "sxth x21, w20", + "mov w20, #0x3", + "mul x22, x21, x20", + "sbfx x20, x22, #16, #16", + "mov x21, x4", + "mov x23, x21", + "bfxil x23, x22, #0, #16", + "mov x4, x23", + "sbfx x21, x22, #15, #1", + "cmp x20, x21", "ccmn xzr, #0, #nzCV, eq" ] }, "imul eax, ebx, 3": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x6b", "ExpectedArm64ASM": [ - "mov w20, #0x3", - "smull x21, w7, w20", - "asr x21, x21, #32", - "mul w4, w7, w20", - "sbfx x20, x4, #31, #1", - "cmp x21, x20", + "mov x20, x7", + "mov w21, #0x3", + "smull x22, w20, w21", + "asr x23, x22, #32", + "mul w22, w20, w21", + "mov x4, x22", + "sbfx x20, x22, #31, #1", + "cmp x23, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "imul rax, rbx, 3": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "0x6b", "ExpectedArm64ASM": [ - "mov w20, #0x3", - "smulh x21, x7, x20", - "mul x4, x7, x20", - "asr x20, x4, #63", - "cmp x21, x20", + "mov x20, x7", + "mov w21, #0x3", + "smulh x22, x20, x21", + "mul x23, x20, x21", + "mov x4, x23", + "asr x20, x23, #63", + "cmp x22, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "test al, bl": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x84", "ExpectedArm64ASM": [ - "and w26, w4, w7", - "cmn wzr, w26, lsl #24" + "mov x20, x7", + "mov x21, x4", + "and w22, w21, w20", + "cmn wzr, w22, lsl #24", + "mov x26, x22" ] }, "test ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x84", "ExpectedArm64ASM": [ - "and w26, w4, w7", - "cmn wzr, w26, lsl #16" + "mov x20, x7", + "mov x21, x4", + "and w22, w21, w20", + "cmn wzr, w22, lsl #16", + "mov x26, x22" ] }, "test eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x84", "ExpectedArm64ASM": [ - "ands w26, w4, w7" + "mov x20, x7", + "mov x21, x4", + "ands w22, w21, w20", + "mov x26, x22" ] }, "test rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x84", "ExpectedArm64ASM": [ - "ands x26, x4, x7" + "mov x20, x7", + "mov x21, x4", + "ands x22, x21, x20", + "mov x26, x22" ] }, "pushf": { - "ExpectedInstructionCount": 39, + "ExpectedInstructionCount": 44, "Comment": "0x9c", "ExpectedArm64ASM": [ "cset w20, hs", - "eor w21, w27, w26", - "ubfx w21, w21, #4, #1", - "orr x20, x20, x21, lsl #4", - "ldrb w21, [x28, #712]", - "orr x20, x20, x21, lsl #8", - "ldrb w21, [x28, #713]", - "orr x20, x20, x21, lsl #9", - "ldrsb x21, [x28, #714]", - "lsr x21, x21, #63", - "orr x20, x20, x21, lsl #10", + "mov x21, x27", + "mov x22, x26", + "eor w23, w21, w22", + "ubfx w21, w23, #4, #1", + "orr x23, x20, x21, lsl #4", + "ldrb w20, [x28, #712]", + "orr x21, x23, x20, lsl #8", + "ldrb w20, [x28, #713]", + "orr x23, x21, x20, lsl #9", + "ldrsb x20, [x28, #714]", + "lsr x21, x20, #63", + "orr x20, x23, x21, lsl #10", "cset w21, vs", - "orr x20, x20, x21, lsl #11", - "ldrb w21, [x28, #716]", - "orr x20, x20, x21, lsl #12", - "ldrb w21, [x28, #718]", - "orr x20, x20, x21, lsl #14", - "ldrb w21, [x28, #720]", - "orr x20, x20, x21, lsl #16", - "ldrb w21, [x28, #721]", - "orr x20, x20, x21, lsl #17", - "ldrb w21, [x28, #722]", - "orr x20, x20, x21, lsl #18", - "ldrb w21, [x28, #723]", - "orr x20, x20, x21, lsl #19", - "ldrb w21, [x28, #724]", - "orr x20, x20, x21, lsl #20", - "ldrb w21, [x28, #725]", - "orr x20, x20, x21, lsl #21", - "eor w21, w26, w26, lsr #4", - "eor w21, w21, w21, lsr #2", - "eor w21, w21, w21, lsr #1", - "orr x21, x21, #0xfffffffffffffffe", - "orn x20, x20, x21, ror #62", + "orr x23, x20, x21, lsl #11", + "ldrb w20, [x28, #716]", + "orr x21, x23, x20, lsl #12", + "ldrb w20, [x28, #718]", + "orr x23, x21, x20, lsl #14", + "ldrb w20, [x28, #720]", + "orr x21, x23, x20, lsl #16", + "ldrb w20, [x28, #721]", + "orr x23, x21, x20, lsl #17", + "ldrb w20, [x28, #722]", + "orr x21, x23, x20, lsl #18", + "ldrb w20, [x28, #723]", + "orr x23, x21, x20, lsl #19", + "ldrb w20, [x28, #724]", + "orr x21, x23, x20, lsl #20", + "ldrb w20, [x28, #725]", + "orr x23, x21, x20, lsl #21", + "eor w20, w22, w22, lsr #4", + "eor w21, w20, w20, lsr #2", + "eor w20, w21, w21, lsr #1", + "orr x21, x20, #0xfffffffffffffffe", + "orn x20, x23, x21, ror #62", "mrs x21, nzcv", - "and x21, x21, #0xc0000000", - "orr x20, x20, x21, lsr #24", - "orr x20, x20, #0x2", - "str x20, [x8, #-8]!" + "and x22, x21, #0xc0000000", + "orr x21, x20, x22, lsr #24", + "orr x20, x21, #0x2", + "mov x21, x8", + "mov x22, x21", + "str x20, [x22, #-8]!", + "mov x8, x22" ] }, "pushfq": { - "ExpectedInstructionCount": 39, + "ExpectedInstructionCount": 44, "Comment": "0x9c", "ExpectedArm64ASM": [ "cset w20, hs", - "eor w21, w27, w26", - "ubfx w21, w21, #4, #1", - "orr x20, x20, x21, lsl #4", - "ldrb w21, [x28, #712]", - "orr x20, x20, x21, lsl #8", - "ldrb w21, [x28, #713]", - "orr x20, x20, x21, lsl #9", - "ldrsb x21, [x28, #714]", - "lsr x21, x21, #63", - "orr x20, x20, x21, lsl #10", + "mov x21, x27", + "mov x22, x26", + "eor w23, w21, w22", + "ubfx w21, w23, #4, #1", + "orr x23, x20, x21, lsl #4", + "ldrb w20, [x28, #712]", + "orr x21, x23, x20, lsl #8", + "ldrb w20, [x28, #713]", + "orr x23, x21, x20, lsl #9", + "ldrsb x20, [x28, #714]", + "lsr x21, x20, #63", + "orr x20, x23, x21, lsl #10", "cset w21, vs", - "orr x20, x20, x21, lsl #11", - "ldrb w21, [x28, #716]", - "orr x20, x20, x21, lsl #12", - "ldrb w21, [x28, #718]", - "orr x20, x20, x21, lsl #14", - "ldrb w21, [x28, #720]", - "orr x20, x20, x21, lsl #16", - "ldrb w21, [x28, #721]", - "orr x20, x20, x21, lsl #17", - "ldrb w21, [x28, #722]", - "orr x20, x20, x21, lsl #18", - "ldrb w21, [x28, #723]", - "orr x20, x20, x21, lsl #19", - "ldrb w21, [x28, #724]", - "orr x20, x20, x21, lsl #20", - "ldrb w21, [x28, #725]", - "orr x20, x20, x21, lsl #21", - "eor w21, w26, w26, lsr #4", - "eor w21, w21, w21, lsr #2", - "eor w21, w21, w21, lsr #1", - "orr x21, x21, #0xfffffffffffffffe", - "orn x20, x20, x21, ror #62", + "orr x23, x20, x21, lsl #11", + "ldrb w20, [x28, #716]", + "orr x21, x23, x20, lsl #12", + "ldrb w20, [x28, #718]", + "orr x23, x21, x20, lsl #14", + "ldrb w20, [x28, #720]", + "orr x21, x23, x20, lsl #16", + "ldrb w20, [x28, #721]", + "orr x23, x21, x20, lsl #17", + "ldrb w20, [x28, #722]", + "orr x21, x23, x20, lsl #18", + "ldrb w20, [x28, #723]", + "orr x23, x21, x20, lsl #19", + "ldrb w20, [x28, #724]", + "orr x21, x23, x20, lsl #20", + "ldrb w20, [x28, #725]", + "orr x23, x21, x20, lsl #21", + "eor w20, w22, w22, lsr #4", + "eor w21, w20, w20, lsr #2", + "eor w20, w21, w21, lsr #1", + "orr x21, x20, #0xfffffffffffffffe", + "orn x20, x23, x21, ror #62", "mrs x21, nzcv", - "and x21, x21, #0xc0000000", - "orr x20, x20, x21, lsr #24", - "orr x20, x20, #0x2", - "str x20, [x8, #-8]!" + "and x22, x21, #0xc0000000", + "orr x21, x20, x22, lsr #24", + "orr x20, x21, #0x2", + "mov x21, x8", + "mov x22, x21", + "str x20, [x22, #-8]!", + "mov x8, x22" ] }, "popf": { - "ExpectedInstructionCount": 34, + "ExpectedInstructionCount": 38, "Comment": "0x9d", "ExpectedArm64ASM": [ - "ldr x20, [x8]", - "add x8, x8, #0x8 (8)", - "mov w21, #0x202", - "orr x27, x20, x21", - "rmif x27, #63, #nzCv", - "ubfx w20, w27, #2, #1", + "mov x20, x8", + "ldr x21, [x20]", + "add x22, x20, #0x8 (8)", + "mov x8, x22", + "mov w20, #0x202", + "orr x22, x21, x20", + "rmif x22, #63, #nzCv", + "ubfx w20, w22, #2, #1", "mov w21, #0x1", - "eor w26, w20, #0x1", - "rmif x27, #4, #nZcv", - "rmif x27, #4, #Nzcv", - "ubfx w20, w27, #8, #1", + "eor w23, w20, #0x1", + "mov x26, x23", + "mov x27, x22", + "rmif x22, #4, #nZcv", + "rmif x22, #4, #Nzcv", + "ubfx w20, w22, #8, #1", "strb w20, [x28, #712]", - "ubfx w20, w27, #9, #1", + "ubfx w20, w22, #9, #1", "strb w20, [x28, #713]", - "ubfx w20, w27, #10, #1", - "sub x20, x21, x20, lsl #1", - "strb w20, [x28, #714]", - "rmif x27, #11, #nzcV", - "ubfx w20, w27, #12, #1", + "ubfx w20, w22, #10, #1", + "sub x23, x21, x20, lsl #1", + "strb w23, [x28, #714]", + "rmif x22, #11, #nzcV", + "ubfx w20, w22, #12, #1", "strb w20, [x28, #716]", - "ubfx w20, w27, #14, #1", + "ubfx w20, w22, #14, #1", "strb w20, [x28, #718]", - "ubfx w20, w27, #16, #1", + "ubfx w20, w22, #16, #1", "strb w20, [x28, #720]", - "ubfx w20, w27, #17, #1", + "ubfx w20, w22, #17, #1", "strb w20, [x28, #721]", - "ubfx w20, w27, #18, #1", + "ubfx w20, w22, #18, #1", "strb w20, [x28, #722]", - "ubfx w20, w27, #19, #1", + "ubfx w20, w22, #19, #1", "strb w20, [x28, #723]", - "ubfx w20, w27, #20, #1", + "ubfx w20, w22, #20, #1", "strb w20, [x28, #724]", - "ubfx w20, w27, #21, #1", + "ubfx w20, w22, #21, #1", "strb w20, [x28, #725]" ] }, "sahf": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": "0x9e", "ExpectedArm64ASM": [ - "ubfx w20, w4, #8, #8", - "mov w21, #0x28", - "bic x20, x20, x21", - "orr x27, x20, #0x2", - "rmif x27, #63, #nzCv", - "ubfx w20, w27, #2, #1", - "eor w26, w20, #0x1", - "rmif x27, #4, #nZcv", - "rmif x27, #4, #Nzcv" + "mov x20, x4", + "ubfx w21, w20, #8, #8", + "mov w20, #0x28", + "bic x22, x21, x20", + "orr x20, x22, #0x2", + "rmif x20, #63, #nzCv", + "ubfx w21, w20, #2, #1", + "eor w22, w21, #0x1", + "mov x26, x22", + "mov x27, x20", + "rmif x20, #4, #nZcv", + "rmif x20, #4, #Nzcv" ] }, "lahf": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 19, "Comment": "0x9f", "ExpectedArm64ASM": [ "cset w20, hs", - "eor w21, w27, w26", - "ubfx w21, w21, #4, #1", - "orr x20, x20, x21, lsl #4", - "eor w21, w26, w26, lsr #4", - "eor w21, w21, w21, lsr #2", - "eor w21, w21, w21, lsr #1", - "orr x21, x21, #0xfffffffffffffffe", - "orn x20, x20, x21, ror #62", + "mov x21, x27", + "mov x22, x26", + "eor w23, w21, w22", + "ubfx w21, w23, #4, #1", + "orr x23, x20, x21, lsl #4", + "eor w20, w22, w22, lsr #4", + "eor w21, w20, w20, lsr #2", + "eor w20, w21, w21, lsr #1", + "orr x21, x20, #0xfffffffffffffffe", + "orn x20, x23, x21, ror #62", "mrs x21, nzcv", - "and x21, x21, #0xc0000000", - "orr x20, x20, x21, lsr #24", - "orr x20, x20, #0x2", - "bfi x4, x20, #8, #8" + "and x22, x21, #0xc0000000", + "orr x21, x20, x22, lsr #24", + "orr x20, x21, #0x2", + "mov x21, x4", + "mov x22, x21", + "bfi x22, x20, #8, #8", + "mov x4, x22" ] }, "cmpsb": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 16, "Comment": [ "0xa6" ], "ExpectedArm64ASM": [ - "ldrb w20, [x11]", - "ldrb w21, [x10]", - "ldrsb x22, [x28, #714]", - "add x11, x11, x22", - "add x10, x10, x22", - "eor w27, w21, w20", - "lsl w0, w21, #24", - "cmp w0, w20, lsl #24", - "sub w26, w21, w20", + "mov x20, x10", + "mov x21, x11", + "ldrb w22, [x21]", + "ldrb w23, [x20]", + "ldrsb x24, [x28, #714]", + "add x25, x21, x24", + "mov x11, x25", + "add x21, x20, x24", + "mov x10, x21", + "eor w20, w23, w22", + "mov x27, x20", + "lsl w0, w23, #24", + "cmp w0, w22, lsl #24", + "sub w20, w23, w22", + "mov x26, x20", "cfinv" ] }, "cmpsw": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 17, "Comment": [ "0xa7" ], "ExpectedArm64ASM": [ - "ldrh w20, [x11]", - "ldrh w21, [x10]", - "ldrsb x22, [x28, #714]", - "lsl x22, x22, #1", - "add x11, x11, x22", - "add x10, x10, x22", - "eor w27, w21, w20", - "lsl w0, w21, #16", - "cmp w0, w20, lsl #16", - "sub w26, w21, w20", + "mov x20, x10", + "mov x21, x11", + "ldrh w22, [x21]", + "ldrh w23, [x20]", + "ldrsb x24, [x28, #714]", + "lsl x25, x24, #1", + "add x24, x21, x25", + "mov x11, x24", + "add x21, x20, x25", + "mov x10, x21", + "eor w20, w23, w22", + "mov x27, x20", + "lsl w0, w23, #16", + "cmp w0, w22, lsl #16", + "sub w20, w23, w22", + "mov x26, x20", "cfinv" ] }, "cmpsd": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 15, "Comment": [ "0xa7" ], "ExpectedArm64ASM": [ - "ldr w20, [x11]", - "ldr w21, [x10]", - "ldrsb x22, [x28, #714]", - "lsl x22, x22, #2", - "add x11, x11, x22", - "add x10, x10, x22", - "eor w27, w21, w20", - "subs w26, w21, w20", + "mov x20, x10", + "mov x21, x11", + "ldr w22, [x21]", + "ldr w23, [x20]", + "ldrsb x24, [x28, #714]", + "lsl x25, x24, #2", + "add x24, x21, x25", + "mov x11, x24", + "add x21, x20, x25", + "mov x10, x21", + "eor w20, w23, w22", + "mov x27, x20", + "subs w20, w23, w22", + "mov x26, x20", "cfinv" ] }, "cmpsq": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 15, "Comment": [ "0xa7" ], "ExpectedArm64ASM": [ - "ldr x20, [x11]", - "ldr x21, [x10]", - "ldrsb x22, [x28, #714]", - "lsl x22, x22, #3", - "add x11, x11, x22", - "add x10, x10, x22", - "eor w27, w21, w20", - "subs x26, x21, x20", + "mov x20, x10", + "mov x21, x11", + "ldr x22, [x21]", + "ldr x23, [x20]", + "ldrsb x24, [x28, #714]", + "lsl x25, x24, #3", + "add x24, x21, x25", + "mov x11, x24", + "add x21, x20, x25", + "mov x10, x21", + "eor w20, w23, w22", + "mov x27, x20", + "subs x20, x23, x22", + "mov x26, x20", "cfinv" ] }, "repz cmpsb": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 46, "Comment": "0xa6", "ExpectedArm64ASM": [ - "cbz x5, #+0x68", + "mov x20, x5", + "cbz x20, #+0xb4", "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x24", - "ldrb w26, [x11]", - "ldrb w27, [x10]", - "subs x5, x5, #0x1 (1)", - "add x11, x11, #0x1 (1)", - "add x10, x10, #0x1 (1)", - "ccmp x27, x26, #nzcv, ne", - "b.eq #-0x18", - "b #+0x20", - "ldrb w26, [x11]", - "ldrb w27, [x10]", - "subs x5, x5, #0x1 (1)", - "sub x11, x11, #0x1 (1)", - "sub x10, x10, #0x1 (1)", - "ccmp x27, x26, #nzcv, ne", - "b.eq #-0x18", - "mov x20, x27", - "eor w27, w20, w26", - "lsl w0, w20, #24", - "cmp w0, w26, lsl #24", - "sub w26, w20, w26", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x44", + "mov x20, x10", + "mov x21, x11", + "ldrb w22, [x21]", + "ldrb w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "add x24, x21, #0x1 (1)", + "mov x11, x24", + "add x21, x20, #0x1 (1)", + "mov x10, x21", + "ccmp x23, x22, #nzcv, ne", + "b.eq #-0x38", + "b #+0x40", + "mov x20, x10", + "mov x21, x11", + "ldrb w22, [x21]", + "ldrb w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "sub x24, x21, #0x1 (1)", + "mov x11, x24", + "sub x21, x20, #0x1 (1)", + "mov x10, x21", + "ccmp x23, x22, #nzcv, ne", + "b.eq #-0x38", + "mov x20, x26", + "mov x21, x27", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #24", + "cmp w0, w20, lsl #24", + "sub w22, w21, w20", + "mov x26, x22", "cfinv" ] }, "repz cmpsw": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 46, "Comment": "0xa7", "ExpectedArm64ASM": [ - "cbz x5, #+0x68", + "mov x20, x5", + "cbz x20, #+0xb4", "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x24", - "ldrh w26, [x11]", - "ldrh w27, [x10]", - "subs x5, x5, #0x1 (1)", - "add x11, x11, #0x2 (2)", - "add x10, x10, #0x2 (2)", - "ccmp x27, x26, #nzcv, ne", - "b.eq #-0x18", - "b #+0x20", - "ldrh w26, [x11]", - "ldrh w27, [x10]", - "subs x5, x5, #0x1 (1)", - "sub x11, x11, #0x2 (2)", - "sub x10, x10, #0x2 (2)", - "ccmp x27, x26, #nzcv, ne", - "b.eq #-0x18", - "mov x20, x27", - "eor w27, w20, w26", - "lsl w0, w20, #16", - "cmp w0, w26, lsl #16", - "sub w26, w20, w26", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x44", + "mov x20, x10", + "mov x21, x11", + "ldrh w22, [x21]", + "ldrh w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "add x24, x21, #0x2 (2)", + "mov x11, x24", + "add x21, x20, #0x2 (2)", + "mov x10, x21", + "ccmp x23, x22, #nzcv, ne", + "b.eq #-0x38", + "b #+0x40", + "mov x20, x10", + "mov x21, x11", + "ldrh w22, [x21]", + "ldrh w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "sub x24, x21, #0x2 (2)", + "mov x11, x24", + "sub x21, x20, #0x2 (2)", + "mov x10, x21", + "ccmp x23, x22, #nzcv, ne", + "b.eq #-0x38", + "mov x20, x26", + "mov x21, x27", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #16", + "cmp w0, w20, lsl #16", + "sub w22, w21, w20", + "mov x26, x22", "cfinv" ] }, "repz cmpsd": { - "ExpectedInstructionCount": 24, + "ExpectedInstructionCount": 44, "Comment": "0xa7", "ExpectedArm64ASM": [ - "cbz x5, #+0x60", + "mov x20, x5", + "cbz x20, #+0xac", "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x24", - "ldr w26, [x11]", - "ldr w27, [x10]", - "subs x5, x5, #0x1 (1)", - "add x11, x11, #0x4 (4)", - "add x10, x10, #0x4 (4)", - "ccmp x27, x26, #nzcv, ne", - "b.eq #-0x18", - "b #+0x20", - "ldr w26, [x11]", - "ldr w27, [x10]", - "subs x5, x5, #0x1 (1)", - "sub x11, x11, #0x4 (4)", - "sub x10, x10, #0x4 (4)", - "ccmp x27, x26, #nzcv, ne", - "b.eq #-0x18", - "mov x20, x27", - "eor w27, w20, w26", - "subs w26, w20, w26", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x44", + "mov x20, x10", + "mov x21, x11", + "ldr w22, [x21]", + "ldr w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "add x24, x21, #0x4 (4)", + "mov x11, x24", + "add x21, x20, #0x4 (4)", + "mov x10, x21", + "ccmp x23, x22, #nzcv, ne", + "b.eq #-0x38", + "b #+0x40", + "mov x20, x10", + "mov x21, x11", + "ldr w22, [x21]", + "ldr w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "sub x24, x21, #0x4 (4)", + "mov x11, x24", + "sub x21, x20, #0x4 (4)", + "mov x10, x21", + "ccmp x23, x22, #nzcv, ne", + "b.eq #-0x38", + "mov x20, x26", + "mov x21, x27", + "eor w22, w21, w20", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "cfinv" ] }, "repz cmpsq": { - "ExpectedInstructionCount": 24, + "ExpectedInstructionCount": 44, "Comment": "0xa7", "ExpectedArm64ASM": [ - "cbz x5, #+0x60", + "mov x20, x5", + "cbz x20, #+0xac", "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x24", - "ldr x26, [x11]", - "ldr x27, [x10]", - "subs x5, x5, #0x1 (1)", - "add x11, x11, #0x8 (8)", - "add x10, x10, #0x8 (8)", - "ccmp x27, x26, #nzcv, ne", - "b.eq #-0x18", - "b #+0x20", - "ldr x26, [x11]", - "ldr x27, [x10]", - "subs x5, x5, #0x1 (1)", - "sub x11, x11, #0x8 (8)", - "sub x10, x10, #0x8 (8)", - "ccmp x27, x26, #nzcv, ne", - "b.eq #-0x18", - "mov x20, x27", - "eor w27, w20, w26", - "subs x26, x20, x26", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x44", + "mov x20, x10", + "mov x21, x11", + "ldr x22, [x21]", + "ldr x23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "add x24, x21, #0x8 (8)", + "mov x11, x24", + "add x21, x20, #0x8 (8)", + "mov x10, x21", + "ccmp x23, x22, #nzcv, ne", + "b.eq #-0x38", + "b #+0x40", + "mov x20, x10", + "mov x21, x11", + "ldr x22, [x21]", + "ldr x23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "sub x24, x21, #0x8 (8)", + "mov x11, x24", + "sub x21, x20, #0x8 (8)", + "mov x10, x21", + "ccmp x23, x22, #nzcv, ne", + "b.eq #-0x38", + "mov x20, x26", + "mov x21, x27", + "eor w22, w21, w20", + "mov x27, x22", + "subs x22, x21, x20", + "mov x26, x22", "cfinv" ] }, "repnz cmpsb": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 46, "Comment": "0xa6", "ExpectedArm64ASM": [ - "cbz x5, #+0x68", + "mov x20, x5", + "cbz x20, #+0xb4", "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x24", - "ldrb w26, [x11]", - "ldrb w27, [x10]", - "subs x5, x5, #0x1 (1)", - "add x11, x11, #0x1 (1)", - "add x10, x10, #0x1 (1)", - "ccmp x27, x26, #nZcv, ne", - "b.ne #-0x18", - "b #+0x20", - "ldrb w26, [x11]", - "ldrb w27, [x10]", - "subs x5, x5, #0x1 (1)", - "sub x11, x11, #0x1 (1)", - "sub x10, x10, #0x1 (1)", - "ccmp x27, x26, #nZcv, ne", - "b.ne #-0x18", - "mov x20, x27", - "eor w27, w20, w26", - "lsl w0, w20, #24", - "cmp w0, w26, lsl #24", - "sub w26, w20, w26", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x44", + "mov x20, x10", + "mov x21, x11", + "ldrb w22, [x21]", + "ldrb w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "add x24, x21, #0x1 (1)", + "mov x11, x24", + "add x21, x20, #0x1 (1)", + "mov x10, x21", + "ccmp x23, x22, #nZcv, ne", + "b.ne #-0x38", + "b #+0x40", + "mov x20, x10", + "mov x21, x11", + "ldrb w22, [x21]", + "ldrb w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "sub x24, x21, #0x1 (1)", + "mov x11, x24", + "sub x21, x20, #0x1 (1)", + "mov x10, x21", + "ccmp x23, x22, #nZcv, ne", + "b.ne #-0x38", + "mov x20, x26", + "mov x21, x27", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #24", + "cmp w0, w20, lsl #24", + "sub w22, w21, w20", + "mov x26, x22", "cfinv" ] }, "repnz cmpsw": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 46, "Comment": "0xa7", "ExpectedArm64ASM": [ - "cbz x5, #+0x68", + "mov x20, x5", + "cbz x20, #+0xb4", "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x24", - "ldrh w26, [x11]", - "ldrh w27, [x10]", - "subs x5, x5, #0x1 (1)", - "add x11, x11, #0x2 (2)", - "add x10, x10, #0x2 (2)", - "ccmp x27, x26, #nZcv, ne", - "b.ne #-0x18", - "b #+0x20", - "ldrh w26, [x11]", - "ldrh w27, [x10]", - "subs x5, x5, #0x1 (1)", - "sub x11, x11, #0x2 (2)", - "sub x10, x10, #0x2 (2)", - "ccmp x27, x26, #nZcv, ne", - "b.ne #-0x18", - "mov x20, x27", - "eor w27, w20, w26", - "lsl w0, w20, #16", - "cmp w0, w26, lsl #16", - "sub w26, w20, w26", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x44", + "mov x20, x10", + "mov x21, x11", + "ldrh w22, [x21]", + "ldrh w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "add x24, x21, #0x2 (2)", + "mov x11, x24", + "add x21, x20, #0x2 (2)", + "mov x10, x21", + "ccmp x23, x22, #nZcv, ne", + "b.ne #-0x38", + "b #+0x40", + "mov x20, x10", + "mov x21, x11", + "ldrh w22, [x21]", + "ldrh w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "sub x24, x21, #0x2 (2)", + "mov x11, x24", + "sub x21, x20, #0x2 (2)", + "mov x10, x21", + "ccmp x23, x22, #nZcv, ne", + "b.ne #-0x38", + "mov x20, x26", + "mov x21, x27", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #16", + "cmp w0, w20, lsl #16", + "sub w22, w21, w20", + "mov x26, x22", "cfinv" ] }, "repnz cmpsd": { - "ExpectedInstructionCount": 24, + "ExpectedInstructionCount": 44, "Comment": "0xa7", "ExpectedArm64ASM": [ - "cbz x5, #+0x60", + "mov x20, x5", + "cbz x20, #+0xac", "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x24", - "ldr w26, [x11]", - "ldr w27, [x10]", - "subs x5, x5, #0x1 (1)", - "add x11, x11, #0x4 (4)", - "add x10, x10, #0x4 (4)", - "ccmp x27, x26, #nZcv, ne", - "b.ne #-0x18", - "b #+0x20", - "ldr w26, [x11]", - "ldr w27, [x10]", - "subs x5, x5, #0x1 (1)", - "sub x11, x11, #0x4 (4)", - "sub x10, x10, #0x4 (4)", - "ccmp x27, x26, #nZcv, ne", - "b.ne #-0x18", - "mov x20, x27", - "eor w27, w20, w26", - "subs w26, w20, w26", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x44", + "mov x20, x10", + "mov x21, x11", + "ldr w22, [x21]", + "ldr w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "add x24, x21, #0x4 (4)", + "mov x11, x24", + "add x21, x20, #0x4 (4)", + "mov x10, x21", + "ccmp x23, x22, #nZcv, ne", + "b.ne #-0x38", + "b #+0x40", + "mov x20, x10", + "mov x21, x11", + "ldr w22, [x21]", + "ldr w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "sub x24, x21, #0x4 (4)", + "mov x11, x24", + "sub x21, x20, #0x4 (4)", + "mov x10, x21", + "ccmp x23, x22, #nZcv, ne", + "b.ne #-0x38", + "mov x20, x26", + "mov x21, x27", + "eor w22, w21, w20", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "cfinv" ] }, "repnz cmpsq": { - "ExpectedInstructionCount": 24, + "ExpectedInstructionCount": 44, "Comment": "0xa7", "ExpectedArm64ASM": [ - "cbz x5, #+0x60", + "mov x20, x5", + "cbz x20, #+0xac", "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x24", - "ldr x26, [x11]", - "ldr x27, [x10]", - "subs x5, x5, #0x1 (1)", - "add x11, x11, #0x8 (8)", - "add x10, x10, #0x8 (8)", - "ccmp x27, x26, #nZcv, ne", - "b.ne #-0x18", - "b #+0x20", - "ldr x26, [x11]", - "ldr x27, [x10]", - "subs x5, x5, #0x1 (1)", - "sub x11, x11, #0x8 (8)", - "sub x10, x10, #0x8 (8)", - "ccmp x27, x26, #nZcv, ne", - "b.ne #-0x18", - "mov x20, x27", - "eor w27, w20, w26", - "subs x26, x20, x26", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x44", + "mov x20, x10", + "mov x21, x11", + "ldr x22, [x21]", + "ldr x23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "add x24, x21, #0x8 (8)", + "mov x11, x24", + "add x21, x20, #0x8 (8)", + "mov x10, x21", + "ccmp x23, x22, #nZcv, ne", + "b.ne #-0x38", + "b #+0x40", + "mov x20, x10", + "mov x21, x11", + "ldr x22, [x21]", + "ldr x23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "sub x24, x21, #0x8 (8)", + "mov x11, x24", + "sub x21, x20, #0x8 (8)", + "mov x10, x21", + "ccmp x23, x22, #nZcv, ne", + "b.ne #-0x38", + "mov x20, x26", + "mov x21, x27", + "eor w22, w21, w20", + "mov x27, x22", + "subs x22, x21, x20", + "mov x26, x22", "cfinv" ] }, "test al, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xa8", "ExpectedArm64ASM": [ - "and w26, w4, #0x1", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "and w21, w20, #0x1", + "cmn wzr, w21, lsl #24", + "mov x26, x21" ] }, "test ax, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xa9", "ExpectedArm64ASM": [ - "and w26, w4, #0x1", - "cmn wzr, w26, lsl #16" + "mov x20, x4", + "and w21, w20, #0x1", + "cmn wzr, w21, lsl #16", + "mov x26, x21" ] }, "test eax, 1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xa9", "ExpectedArm64ASM": [ - "ands w26, w4, #0x1" + "mov x20, x4", + "ands w21, w20, #0x1", + "mov x26, x21" ] }, "test rax, 1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xa9", "ExpectedArm64ASM": [ - "ands x26, x4, #0x1" + "mov x20, x4", + "ands x21, x20, #0x1", + "mov x26, x21" ] }, "test al, -1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xa8", "ExpectedArm64ASM": [ - "mov x26, x4", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "mov x21, x20", + "cmn wzr, w21, lsl #24", + "mov x26, x21" ] }, "test ax, -1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xa9", "ExpectedArm64ASM": [ - "mov x26, x4", - "cmn wzr, w26, lsl #16" + "mov x20, x4", + "mov x21, x20", + "cmn wzr, w21, lsl #16", + "mov x26, x21" ] }, "test eax, -1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xa9", "ExpectedArm64ASM": [ - "ands w26, w4, w4" + "mov x20, x4", + "ands w21, w20, w20", + "mov x26, x21" ] }, "test rax, -1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xa9", "ExpectedArm64ASM": [ - "ands x26, x4, x4" + "mov x20, x4", + "ands x21, x20, x20", + "mov x26, x21" ] }, "scasb": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 13, "Comment": "0xae", "ExpectedArm64ASM": [ - "ldrb w20, [x11]", - "ldrsb x21, [x28, #714]", - "add x11, x11, x21", - "eor w27, w4, w20", - "lsl w0, w4, #24", - "cmp w0, w20, lsl #24", - "sub w26, w4, w20", + "mov x20, x11", + "mov x21, x4", + "ldrb w22, [x20]", + "ldrsb x23, [x28, #714]", + "add x24, x20, x23", + "mov x11, x24", + "eor w20, w21, w22", + "mov x27, x20", + "lsl w0, w21, #24", + "cmp w0, w22, lsl #24", + "sub w20, w21, w22", + "mov x26, x20", "cfinv" ] }, "scasw": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 13, "Comment": "0xaf", "ExpectedArm64ASM": [ - "ldrh w20, [x11]", - "ldrsb x21, [x28, #714]", - "add x11, x11, x21, lsl #1", - "eor w27, w4, w20", - "lsl w0, w4, #16", - "cmp w0, w20, lsl #16", - "sub w26, w4, w20", + "mov x20, x11", + "mov x21, x4", + "ldrh w22, [x20]", + "ldrsb x23, [x28, #714]", + "add x24, x20, x23, lsl #1", + "mov x11, x24", + "eor w20, w21, w22", + "mov x27, x20", + "lsl w0, w21, #16", + "cmp w0, w22, lsl #16", + "sub w20, w21, w22", + "mov x26, x20", "cfinv" ] }, "scasd": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": "0xaf", "ExpectedArm64ASM": [ - "ldr w20, [x11]", - "ldrsb x21, [x28, #714]", - "add x11, x11, x21, lsl #2", - "eor w27, w4, w20", - "subs w26, w4, w20", + "mov x20, x11", + "mov x21, x4", + "ldr w22, [x20]", + "ldrsb x23, [x28, #714]", + "add x24, x20, x23, lsl #2", + "mov x11, x24", + "eor w20, w21, w22", + "mov x27, x20", + "subs w20, w21, w22", + "mov x26, x20", "cfinv" ] }, "scasq": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": "0xaf", "ExpectedArm64ASM": [ - "ldr x20, [x11]", - "ldrsb x21, [x28, #714]", - "add x11, x11, x21, lsl #3", - "eor w27, w4, w20", - "subs x26, x4, x20", + "mov x20, x11", + "mov x21, x4", + "ldr x22, [x20]", + "ldrsb x23, [x28, #714]", + "add x24, x20, x23, lsl #3", + "mov x11, x24", + "eor w20, w21, w22", + "mov x27, x20", + "subs x20, x21, x22", + "mov x26, x20", "cfinv" ] }, "repz scasb": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 41, "Comment": "0xae", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x30", - "cbz x5, #+0x28", - "ldrb w20, [x11]", - "eor w27, w4, w20", - "lsl w0, w4, #24", - "cmp w0, w20, lsl #24", - "sub w26, w4, w20", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x50", + "mov x20, x5", + "cbz x20, #+0x44", + "mov x20, x11", + "mov x21, x4", + "ldrb w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "lsl w0, w21, #24", + "cmp w0, w22, lsl #24", + "sub w23, w21, w22", + "mov x26, x23", "cfinv", - "sub x5, x5, #0x1 (1)", - "add x11, x11, #0x1 (1)", - "b.eq #-0x24", - "b #+0x2c", - "cbz x5, #+0x28", - "ldrb w20, [x11]", - "eor w27, w4, w20", - "lsl w0, w4, #24", - "cmp w0, w20, lsl #24", - "sub w26, w4, w20", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x1 (1)", + "mov x11, x21", + "b.eq #-0x44", + "b #+0x4c", + "mov x20, x5", + "cbz x20, #+0x44", + "mov x20, x11", + "mov x21, x4", + "ldrb w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "lsl w0, w21, #24", + "cmp w0, w22, lsl #24", + "sub w23, w21, w22", + "mov x26, x23", "cfinv", - "sub x5, x5, #0x1 (1)", - "sub x11, x11, #0x1 (1)", - "b.eq #-0x24" + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x1 (1)", + "mov x11, x21", + "b.eq #-0x44" ] }, "repz scasw": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 41, "Comment": "0xaf", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x30", - "cbz x5, #+0x28", - "ldrh w20, [x11]", - "eor w27, w4, w20", - "lsl w0, w4, #16", - "cmp w0, w20, lsl #16", - "sub w26, w4, w20", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x50", + "mov x20, x5", + "cbz x20, #+0x44", + "mov x20, x11", + "mov x21, x4", + "ldrh w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "lsl w0, w21, #16", + "cmp w0, w22, lsl #16", + "sub w23, w21, w22", + "mov x26, x23", "cfinv", - "sub x5, x5, #0x1 (1)", - "add x11, x11, #0x2 (2)", - "b.eq #-0x24", - "b #+0x2c", - "cbz x5, #+0x28", - "ldrh w20, [x11]", - "eor w27, w4, w20", - "lsl w0, w4, #16", - "cmp w0, w20, lsl #16", - "sub w26, w4, w20", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x2 (2)", + "mov x11, x21", + "b.eq #-0x44", + "b #+0x4c", + "mov x20, x5", + "cbz x20, #+0x44", + "mov x20, x11", + "mov x21, x4", + "ldrh w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "lsl w0, w21, #16", + "cmp w0, w22, lsl #16", + "sub w23, w21, w22", + "mov x26, x23", "cfinv", - "sub x5, x5, #0x1 (1)", - "sub x11, x11, #0x2 (2)", - "b.eq #-0x24" + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x2 (2)", + "mov x11, x21", + "b.eq #-0x44" ] }, "repz scasd": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 37, "Comment": "0xaf", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x28", - "cbz x5, #+0x20", - "ldr w20, [x11]", - "eor w27, w4, w20", - "subs w26, w4, w20", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x48", + "mov x20, x5", + "cbz x20, #+0x3c", + "mov x20, x11", + "mov x21, x4", + "ldr w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "subs w23, w21, w22", + "mov x26, x23", "cfinv", - "sub x5, x5, #0x1 (1)", - "add x11, x11, #0x4 (4)", - "b.eq #-0x1c", - "b #+0x24", - "cbz x5, #+0x20", - "ldr w20, [x11]", - "eor w27, w4, w20", - "subs w26, w4, w20", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x4 (4)", + "mov x11, x21", + "b.eq #-0x3c", + "b #+0x44", + "mov x20, x5", + "cbz x20, #+0x3c", + "mov x20, x11", + "mov x21, x4", + "ldr w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "subs w23, w21, w22", + "mov x26, x23", "cfinv", - "sub x5, x5, #0x1 (1)", - "sub x11, x11, #0x4 (4)", - "b.eq #-0x1c" + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x4 (4)", + "mov x11, x21", + "b.eq #-0x3c" ] }, "repz scasq": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 37, "Comment": "0xaf", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x28", - "cbz x5, #+0x20", - "ldr x20, [x11]", - "eor w27, w4, w20", - "subs x26, x4, x20", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x48", + "mov x20, x5", + "cbz x20, #+0x3c", + "mov x20, x11", + "mov x21, x4", + "ldr x22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "subs x23, x21, x22", + "mov x26, x23", "cfinv", - "sub x5, x5, #0x1 (1)", - "add x11, x11, #0x8 (8)", - "b.eq #-0x1c", - "b #+0x24", - "cbz x5, #+0x20", - "ldr x20, [x11]", - "eor w27, w4, w20", - "subs x26, x4, x20", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x8 (8)", + "mov x11, x21", + "b.eq #-0x3c", + "b #+0x44", + "mov x20, x5", + "cbz x20, #+0x3c", + "mov x20, x11", + "mov x21, x4", + "ldr x22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "subs x23, x21, x22", + "mov x26, x23", "cfinv", - "sub x5, x5, #0x1 (1)", - "sub x11, x11, #0x8 (8)", - "b.eq #-0x1c" + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x8 (8)", + "mov x11, x21", + "b.eq #-0x3c" ] }, "repnz scasb": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 41, "Comment": "0xae", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x30", - "cbz x5, #+0x28", - "ldrb w20, [x11]", - "eor w27, w4, w20", - "lsl w0, w4, #24", - "cmp w0, w20, lsl #24", - "sub w26, w4, w20", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x50", + "mov x20, x5", + "cbz x20, #+0x44", + "mov x20, x11", + "mov x21, x4", + "ldrb w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "lsl w0, w21, #24", + "cmp w0, w22, lsl #24", + "sub w23, w21, w22", + "mov x26, x23", "cfinv", - "sub x5, x5, #0x1 (1)", - "add x11, x11, #0x1 (1)", - "b.ne #-0x24", - "b #+0x2c", - "cbz x5, #+0x28", - "ldrb w20, [x11]", - "eor w27, w4, w20", - "lsl w0, w4, #24", - "cmp w0, w20, lsl #24", - "sub w26, w4, w20", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x1 (1)", + "mov x11, x21", + "b.ne #-0x44", + "b #+0x4c", + "mov x20, x5", + "cbz x20, #+0x44", + "mov x20, x11", + "mov x21, x4", + "ldrb w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "lsl w0, w21, #24", + "cmp w0, w22, lsl #24", + "sub w23, w21, w22", + "mov x26, x23", "cfinv", - "sub x5, x5, #0x1 (1)", - "sub x11, x11, #0x1 (1)", - "b.ne #-0x24" + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x1 (1)", + "mov x11, x21", + "b.ne #-0x44" ] }, "repnz scasw": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 41, "Comment": "0xaf", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x30", - "cbz x5, #+0x28", - "ldrh w20, [x11]", - "eor w27, w4, w20", - "lsl w0, w4, #16", - "cmp w0, w20, lsl #16", - "sub w26, w4, w20", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x50", + "mov x20, x5", + "cbz x20, #+0x44", + "mov x20, x11", + "mov x21, x4", + "ldrh w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "lsl w0, w21, #16", + "cmp w0, w22, lsl #16", + "sub w23, w21, w22", + "mov x26, x23", "cfinv", - "sub x5, x5, #0x1 (1)", - "add x11, x11, #0x2 (2)", - "b.ne #-0x24", - "b #+0x2c", - "cbz x5, #+0x28", - "ldrh w20, [x11]", - "eor w27, w4, w20", - "lsl w0, w4, #16", - "cmp w0, w20, lsl #16", - "sub w26, w4, w20", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x2 (2)", + "mov x11, x21", + "b.ne #-0x44", + "b #+0x4c", + "mov x20, x5", + "cbz x20, #+0x44", + "mov x20, x11", + "mov x21, x4", + "ldrh w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "lsl w0, w21, #16", + "cmp w0, w22, lsl #16", + "sub w23, w21, w22", + "mov x26, x23", "cfinv", - "sub x5, x5, #0x1 (1)", - "sub x11, x11, #0x2 (2)", - "b.ne #-0x24" + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x2 (2)", + "mov x11, x21", + "b.ne #-0x44" ] }, "repnz scasd": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 37, "Comment": "0xaf", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x28", - "cbz x5, #+0x20", - "ldr w20, [x11]", - "eor w27, w4, w20", - "subs w26, w4, w20", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x48", + "mov x20, x5", + "cbz x20, #+0x3c", + "mov x20, x11", + "mov x21, x4", + "ldr w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "subs w23, w21, w22", + "mov x26, x23", "cfinv", - "sub x5, x5, #0x1 (1)", - "add x11, x11, #0x4 (4)", - "b.ne #-0x1c", - "b #+0x24", - "cbz x5, #+0x20", - "ldr w20, [x11]", - "eor w27, w4, w20", - "subs w26, w4, w20", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x4 (4)", + "mov x11, x21", + "b.ne #-0x3c", + "b #+0x44", + "mov x20, x5", + "cbz x20, #+0x3c", + "mov x20, x11", + "mov x21, x4", + "ldr w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "subs w23, w21, w22", + "mov x26, x23", "cfinv", - "sub x5, x5, #0x1 (1)", - "sub x11, x11, #0x4 (4)", - "b.ne #-0x1c" + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x4 (4)", + "mov x11, x21", + "b.ne #-0x3c" ] }, "repnz scasq": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 37, "Comment": "0xaf", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x28", - "cbz x5, #+0x20", - "ldr x20, [x11]", - "eor w27, w4, w20", - "subs x26, x4, x20", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x48", + "mov x20, x5", + "cbz x20, #+0x3c", + "mov x20, x11", + "mov x21, x4", + "ldr x22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "subs x23, x21, x22", + "mov x26, x23", "cfinv", - "sub x5, x5, #0x1 (1)", - "add x11, x11, #0x8 (8)", - "b.ne #-0x1c", - "b #+0x24", - "cbz x5, #+0x20", - "ldr x20, [x11]", - "eor w27, w4, w20", - "subs x26, x4, x20", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x8 (8)", + "mov x11, x21", + "b.ne #-0x3c", + "b #+0x44", + "mov x20, x5", + "cbz x20, #+0x3c", + "mov x20, x11", + "mov x21, x4", + "ldr x22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "subs x23, x21, x22", + "mov x26, x23", "cfinv", - "sub x5, x5, #0x1 (1)", - "sub x11, x11, #0x8 (8)", - "b.ne #-0x1c" + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x8 (8)", + "mov x11, x21", + "b.ne #-0x3c" ] }, "cmc": { diff --git a/unittests/InstructionCountCI/FlagM/PrimaryGroup.json b/unittests/InstructionCountCI/FlagM/PrimaryGroup.json index 6c1b652617..5fb1cf6c48 100644 --- a/unittests/InstructionCountCI/FlagM/PrimaryGroup.json +++ b/unittests/InstructionCountCI/FlagM/PrimaryGroup.json @@ -12,2426 +12,3074 @@ }, "Instructions": { "add al, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x80 /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #24", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #24", "cmn w0, w20, lsl #24", - "add w26, w27, #0x1 (1)", - "mov x4, x27", - "bfxil x4, x26, #0, #8" + "add w20, w21, #0x1 (1)", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "or al, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x80 /1", "ExpectedArm64ASM": [ - "orr w26, w4, #0x1", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "orr w21, w20, #0x1", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "adc al, 1": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 20, "Comment": "GROUP1 0x80 /2", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "cset w21, hs", - "adc w20, w27, w20", - "uxtb w26, w20", - "cmp w26, #0x1 (1)", - "cset x20, lo", - "cmp w26, #0x1 (1)", - "cset x22, ls", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv", - "bic w20, w26, w27", - "rmif x20, #7, #nzcV", - "mov x4, x27", - "bfxil x4, x26, #0, #8" + "mov x21, x4", + "mov x27, x21", + "cset w22, hs", + "adc w23, w21, w20", + "uxtb w20, w23", + "cmp w20, #0x1 (1)", + "cset x23, lo", + "cmp w20, #0x1 (1)", + "cset x24, ls", + "cmp x22, #0x1 (1)", + "csel x25, x24, x23, eq", + "cmn wzr, w20, lsl #24", + "rmif x25, #63, #nzCv", + "bic w22, w20, w21", + "rmif x22, #7, #nzcV", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "sbb al, 1": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 21, "Comment": "GROUP1 0x80 /3", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "cset w21, hs", - "add w20, w20, w21", - "sub w20, w27, w20", - "uxtb w26, w20", - "cmp x26, x27", + "mov x21, x4", + "mov x27, x21", + "cset w22, hs", + "add w23, w20, w22", + "sub w20, w21, w23", + "uxtb w23, w20", + "cmp x23, x21", "cset x20, hi", - "cmp x26, x27", - "cset x22, hs", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv", - "bic w20, w27, w26", + "cmp x23, x21", + "cset x24, hs", + "cmp x22, #0x1 (1)", + "csel x25, x24, x20, eq", + "cmn wzr, w23, lsl #24", + "rmif x25, #63, #nzCv", + "bic w20, w21, w23", "rmif x20, #7, #nzcV", - "mov x4, x27", - "bfxil x4, x26, #0, #8" + "mov x26, x23", + "mov x20, x21", + "bfxil x20, x23, #0, #8", + "mov x4, x20" ] }, "and al, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x80 /4", "ExpectedArm64ASM": [ - "and w26, w4, #0x1", - "cmn wzr, w26, lsl #24", - "bfxil x4, x26, #0, #8" + "mov x20, x4", + "and w21, w20, #0x1", + "cmn wzr, w21, lsl #24", + "mov x26, x21", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22" ] }, "sub al, 1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "GROUP1 0x80 /5", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #24", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #24", "cmp w0, w20, lsl #24", - "sub w26, w27, #0x1 (1)", + "sub w20, w21, #0x1 (1)", + "mov x26, x20", "cfinv", - "mov x4, x27", - "bfxil x4, x26, #0, #8" + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "xor al, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x80 /6", "ExpectedArm64ASM": [ - "eor w26, w4, #0x1", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "eor w21, w20, #0x1", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "cmp al, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x80 /7", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #24", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #24", "cmp w0, w20, lsl #24", - "sub w26, w27, #0x1 (1)", + "sub w20, w21, #0x1 (1)", + "mov x26, x20", "cfinv" ] }, "add al, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": "GROUP1 0x80 /0", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "lsl w0, w4, #24", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #24", "cmn w0, w20, lsl #24", - "add w26, w4, #0xff (255)", - "bfxil x4, x26, #0, #8" + "add w20, w21, #0xff (255)", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "or al, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x80 /1", "ExpectedArm64ASM": [ - "orr w26, w4, #0xff", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "orr w21, w20, #0xff", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "adc al, -1": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 21, "Comment": "GROUP1 0x80 /2", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "cset w21, hs", - "adc w20, w4, w20", - "uxtb w26, w20", - "cmp w26, #0xff (255)", - "cset x20, lo", - "cmp w26, #0xff (255)", - "cset x22, ls", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv", - "bic w20, w4, w26", - "rmif x20, #7, #nzcV", - "bfxil x4, x26, #0, #8" + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "cset w22, hs", + "adc w23, w21, w20", + "uxtb w20, w23", + "cmp w20, #0xff (255)", + "cset x23, lo", + "cmp w20, #0xff (255)", + "cset x24, ls", + "cmp x22, #0x1 (1)", + "csel x25, x24, x23, eq", + "cmn wzr, w20, lsl #24", + "rmif x25, #63, #nzCv", + "bic w22, w21, w20", + "rmif x22, #7, #nzcV", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "sbb al, -1": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 22, "Comment": "GROUP1 0x80 /3", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "cset w21, hs", - "add w20, w20, w21", - "sub w20, w4, w20", - "uxtb w26, w20", - "cmp x26, x4", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "cset w22, hs", + "add w23, w20, w22", + "sub w20, w21, w23", + "uxtb w23, w20", + "cmp x23, x21", "cset x20, hi", - "cmp x26, x4", - "cset x22, hs", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv", - "bic w20, w26, w4", + "cmp x23, x21", + "cset x24, hs", + "cmp x22, #0x1 (1)", + "csel x25, x24, x20, eq", + "cmn wzr, w23, lsl #24", + "rmif x25, #63, #nzCv", + "bic w20, w23, w21", "rmif x20, #7, #nzcV", - "bfxil x4, x26, #0, #8" + "mov x26, x23", + "mov x20, x21", + "bfxil x20, x23, #0, #8", + "mov x4, x20" ] }, "and al, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x80 /4", "ExpectedArm64ASM": [ - "and w26, w4, #0xff", - "cmn wzr, w26, lsl #24", - "bfxil x4, x26, #0, #8" + "mov x20, x4", + "and w21, w20, #0xff", + "cmn wzr, w21, lsl #24", + "mov x26, x21", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22" ] }, "sub al, -1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 12, "Comment": "GROUP1 0x80 /5", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "lsl w0, w4, #24", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #24", "cmp w0, w20, lsl #24", - "sub w26, w4, #0xff (255)", + "sub w20, w21, #0xff (255)", + "mov x26, x20", "cfinv", - "bfxil x4, x26, #0, #8" + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "xor al, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x80 /6", "ExpectedArm64ASM": [ - "eor w26, w4, #0xff", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "eor w21, w20, #0xff", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "cmp al, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "GROUP1 0x80 /7", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "lsl w0, w4, #24", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #24", "cmp w0, w20, lsl #24", - "sub w26, w4, #0xff (255)", + "sub w20, w21, #0xff (255)", + "mov x26, x20", "cfinv" ] }, "add ax, 256": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov w20, #0x100", - "mov x27, x4", - "lsl w0, w27, #16", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #16", "cmn w0, w20, lsl #16", - "add w26, w27, #0x100 (256)", - "mov x4, x27", - "bfxil x4, x26, #0, #16" + "add w20, w21, #0x100 (256)", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "add eax, 256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ - "mov x27, x4", - "adds w26, w27, #0x100 (256)", - "mov x4, x26" + "mov x20, x4", + "mov x27, x20", + "adds w21, w20, #0x100 (256)", + "mov x26, x21", + "mov x4, x21" ] }, "add rax, 256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ - "mov x27, x4", - "adds x26, x27, #0x100 (256)", - "mov x4, x26" + "mov x20, x4", + "mov x27, x20", + "adds x21, x20, #0x100 (256)", + "mov x26, x21", + "mov x4, x21" ] }, "or eax, 256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ - "orr w4, w4, #0x100", - "mov x26, x4", - "tst w4, w4" + "mov x20, x4", + "orr w21, w20, #0x100", + "mov x4, x21", + "mov x26, x21", + "tst w21, w21" ] }, "or rax, 256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ - "orr x4, x4, #0x100", - "mov x26, x4", - "tst x4, x4" + "mov x20, x4", + "orr x21, x20, #0x100", + "mov x4, x21", + "mov x26, x21", + "tst x21, x21" ] }, "adc eax, 256": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov w20, #0x100", - "mov x27, x4", - "adcs w26, w27, w20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adcs w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "adc rax, 256": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov w20, #0x100", - "mov x27, x4", - "adcs x26, x27, x20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adcs x22, x21, x20", + "mov x26, x22", + "mov x4, x22" ] }, "sbb eax, 256": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov w20, #0x100", - "mov x27, x4", + "mov x21, x4", + "mov x27, x21", "cfinv", - "sbcs w26, w27, w20", + "sbcs w22, w21, w20", "cfinv", - "mov x4, x26" + "mov x26, x22", + "mov x4, x22" ] }, "sbb rax, 256": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov w20, #0x100", - "mov x27, x4", + "mov x21, x4", + "mov x27, x21", "cfinv", - "sbcs x26, x27, x20", + "sbcs x22, x21, x20", "cfinv", - "mov x4, x26" + "mov x26, x22", + "mov x4, x22" ] }, "and eax, 256": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ - "ands w26, w4, #0x100", - "mov x4, x26" + "mov x20, x4", + "ands w21, w20, #0x100", + "mov x26, x21", + "mov x4, x21" ] }, "and rax, 256": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ - "ands x26, x4, #0x100", - "mov x4, x26" + "mov x20, x4", + "ands x21, x20, #0x100", + "mov x26, x21", + "mov x4, x21" ] }, "sub eax, 256": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs w26, w27, #0x100 (256)", + "mov x20, x4", + "mov x27, x20", + "subs w21, w20, #0x100 (256)", + "mov x26, x21", "cfinv", - "mov x4, x26" + "mov x4, x21" ] }, "sub rax, 256": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs x26, x27, #0x100 (256)", + "mov x20, x4", + "mov x27, x20", + "subs x21, x20, #0x100 (256)", + "mov x26, x21", "cfinv", - "mov x4, x26" + "mov x4, x21" ] }, "xor eax, 256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ - "eor w4, w4, #0x100", - "mov x26, x4", - "tst w4, w4" + "mov x20, x4", + "eor w21, w20, #0x100", + "mov x4, x21", + "mov x26, x21", + "tst w21, w21" ] }, "xor rax, 256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ - "eor x4, x4, #0x100", - "mov x26, x4", - "tst x4, x4" + "mov x20, x4", + "eor x21, x20, #0x100", + "mov x4, x21", + "mov x26, x21", + "tst x21, x21" ] }, "cmp eax, 256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /7", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs w26, w27, #0x100 (256)", + "mov x20, x4", + "mov x27, x20", + "subs w21, w20, #0x100 (256)", + "mov x26, x21", "cfinv" ] }, "cmp rax, 256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /7", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs x26, x27, #0x100 (256)", + "mov x20, x4", + "mov x27, x20", + "subs x21, x20, #0x100 (256)", + "mov x26, x21", "cfinv" ] }, "add ax, -256": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov w20, #0xff00", - "mov x27, x4", - "lsl w0, w27, #16", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #16", "cmn w0, w20, lsl #16", - "add w26, w27, w20", - "mov x4, x27", - "bfxil x4, x26, #0, #16" + "add w22, w21, w20", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x4, x20" ] }, "add eax, -256": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov w20, #0xffffff00", - "mov x27, x4", - "adds w26, w27, w20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adds w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "add rax, -256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs x26, x27, #0x100 (256)", - "mov x4, x26" + "mov x20, x4", + "mov x27, x20", + "subs x21, x20, #0x100 (256)", + "mov x26, x21", + "mov x4, x21" ] }, "or eax, -256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ - "orr w4, w4, #0xffffff00", - "mov x26, x4", - "tst w4, w4" + "mov x20, x4", + "orr w21, w20, #0xffffff00", + "mov x4, x21", + "mov x26, x21", + "tst w21, w21" ] }, "or rax, -256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ - "orr x4, x4, #0xffffffffffffff00", - "mov x26, x4", - "tst x4, x4" + "mov x20, x4", + "orr x21, x20, #0xffffffffffffff00", + "mov x4, x21", + "mov x26, x21", + "tst x21, x21" ] }, "adc eax, -256": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov w20, #0xffffff00", - "mov x27, x4", - "adcs w26, w27, w20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adcs w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "adc rax, -256": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffff00", - "mov x27, x4", - "adcs x26, x27, x20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adcs x22, x21, x20", + "mov x26, x22", + "mov x4, x22" ] }, "sbb eax, -256": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov w20, #0xffffff00", - "mov x27, x4", + "mov x21, x4", + "mov x27, x21", "cfinv", - "sbcs w26, w27, w20", + "sbcs w22, w21, w20", "cfinv", - "mov x4, x26" + "mov x26, x22", + "mov x4, x22" ] }, "sbb rax, -256": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffff00", - "mov x27, x4", + "mov x21, x4", + "mov x27, x21", "cfinv", - "sbcs x26, x27, x20", + "sbcs x22, x21, x20", "cfinv", - "mov x4, x26" + "mov x26, x22", + "mov x4, x22" ] }, "and eax, -256": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ - "ands w26, w4, #0xffffff00", - "mov x4, x26" + "mov x20, x4", + "ands w21, w20, #0xffffff00", + "mov x26, x21", + "mov x4, x21" ] }, "and rax, -256": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ - "ands x26, x4, #0xffffffffffffff00", - "mov x4, x26" + "mov x20, x4", + "ands x21, x20, #0xffffffffffffff00", + "mov x26, x21", + "mov x4, x21" ] }, "sub eax, -256": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ "mov w20, #0xffffff00", - "mov x27, x4", - "subs w26, w27, w20", + "mov x21, x4", + "mov x27, x21", + "subs w22, w21, w20", + "mov x26, x22", "cfinv", - "mov x4, x26" + "mov x4, x22" ] }, "sub rax, -256": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ - "mov x27, x4", - "adds x26, x27, #0x100 (256)", + "mov x20, x4", + "mov x27, x20", + "adds x21, x20, #0x100 (256)", + "mov x26, x21", "cfinv", - "mov x4, x26" + "mov x4, x21" ] }, "xor eax, -256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ - "eor w4, w4, #0xffffff00", - "mov x26, x4", - "tst w4, w4" + "mov x20, x4", + "eor w21, w20, #0xffffff00", + "mov x4, x21", + "mov x26, x21", + "tst w21, w21" ] }, "xor rax, -256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ - "eor x4, x4, #0xffffffffffffff00", - "mov x26, x4", - "tst x4, x4" + "mov x20, x4", + "eor x21, x20, #0xffffffffffffff00", + "mov x4, x21", + "mov x26, x21", + "tst x21, x21" ] }, "cmp eax, -256": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /7", "ExpectedArm64ASM": [ "mov w20, #0xffffff00", - "mov x27, x4", - "subs w26, w27, w20", + "mov x21, x4", + "mov x27, x21", + "subs w22, w21, w20", + "mov x26, x22", "cfinv" ] }, "cmp rax, -256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /7", "ExpectedArm64ASM": [ - "mov x27, x4", - "adds x26, x27, #0x100 (256)", + "mov x20, x4", + "mov x27, x20", + "adds x21, x20, #0x100 (256)", + "mov x26, x21", "cfinv" ] }, "add ax, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #16", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #16", "cmn w0, w20, lsl #16", - "add w26, w27, #0x1 (1)", - "mov x4, x27", - "bfxil x4, x26, #0, #16" + "add w20, w21, #0x1 (1)", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "add eax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ - "mov x27, x4", - "adds w26, w27, #0x1 (1)", - "mov x4, x26" + "mov x20, x4", + "mov x27, x20", + "adds w21, w20, #0x1 (1)", + "mov x26, x21", + "mov x4, x21" ] }, "add rax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ - "mov x27, x4", - "adds x26, x27, #0x1 (1)", - "mov x4, x26" + "mov x20, x4", + "mov x27, x20", + "adds x21, x20, #0x1 (1)", + "mov x26, x21", + "mov x4, x21" ] }, "or eax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /1", "ExpectedArm64ASM": [ - "orr w4, w4, #0x1", - "mov x26, x4", - "tst w4, w4" + "mov x20, x4", + "orr w21, w20, #0x1", + "mov x4, x21", + "mov x26, x21", + "tst w21, w21" ] }, "or rax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /1", "ExpectedArm64ASM": [ - "orr x4, x4, #0x1", - "mov x26, x4", - "tst x4, x4" + "mov x20, x4", + "orr x21, x20, #0x1", + "mov x4, x21", + "mov x26, x21", + "tst x21, x21" ] }, "adc eax, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /2", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "adcs w26, w27, w20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adcs w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "adc rax, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /2", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "adcs x26, x27, x20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adcs x22, x21, x20", + "mov x26, x22", + "mov x4, x22" ] }, "sbb eax, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x83 /3", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", + "mov x21, x4", + "mov x27, x21", "cfinv", - "sbcs w26, w27, w20", + "sbcs w22, w21, w20", "cfinv", - "mov x4, x26" + "mov x26, x22", + "mov x4, x22" ] }, "sbb rax, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x83 /3", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", + "mov x21, x4", + "mov x27, x21", "cfinv", - "sbcs x26, x27, x20", + "sbcs x22, x21, x20", "cfinv", - "mov x4, x26" + "mov x26, x22", + "mov x4, x22" ] }, "and eax, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP1 0x83 /4", "ExpectedArm64ASM": [ - "ands w26, w4, #0x1", - "mov x4, x26" + "mov x20, x4", + "ands w21, w20, #0x1", + "mov x26, x21", + "mov x4, x21" ] }, "and rax, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP1 0x83 /4", "ExpectedArm64ASM": [ - "ands x26, x4, #0x1", - "mov x4, x26" + "mov x20, x4", + "ands x21, x20, #0x1", + "mov x26, x21", + "mov x4, x21" ] }, "sub eax, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /5", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs w26, w27, #0x1 (1)", + "mov x20, x4", + "mov x27, x20", + "subs w21, w20, #0x1 (1)", + "mov x26, x21", "cfinv", - "mov x4, x26" + "mov x4, x21" ] }, "sub rax, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /5", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs x26, x27, #0x1 (1)", + "mov x20, x4", + "mov x27, x20", + "subs x21, x20, #0x1 (1)", + "mov x26, x21", "cfinv", - "mov x4, x26" + "mov x4, x21" ] }, "xor eax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /6", "ExpectedArm64ASM": [ - "eor w4, w4, #0x1", - "mov x26, x4", - "tst w4, w4" + "mov x20, x4", + "eor w21, w20, #0x1", + "mov x4, x21", + "mov x26, x21", + "tst w21, w21" ] }, "xor rax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /6", "ExpectedArm64ASM": [ - "eor x4, x4, #0x1", - "mov x26, x4", - "tst x4, x4" + "mov x20, x4", + "eor x21, x20, #0x1", + "mov x4, x21", + "mov x26, x21", + "tst x21, x21" ] }, "cmp eax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /7", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs w26, w27, #0x1 (1)", + "mov x20, x4", + "mov x27, x20", + "subs w21, w20, #0x1 (1)", + "mov x26, x21", "cfinv" ] }, "cmp rax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /7", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs x26, x27, #0x1 (1)", + "mov x20, x4", + "mov x27, x20", + "subs x21, x20, #0x1 (1)", + "mov x26, x21", "cfinv" ] }, "add ax, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "mvn w27, w4", - "lsl w0, w4, #16", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #16", "cmn w0, w20, lsl #16", - "add w26, w4, w20", - "bfxil x4, x26, #0, #16" + "add w22, w21, w20", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x4, x20" ] }, "add eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "adds w26, w4, w20", - "mov x4, x26" + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "adds w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "add rax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ - "mvn w27, w4", - "subs x26, x4, #0x1 (1)", - "mov x4, x26" + "mov x20, x4", + "mvn w21, w20", + "mov x27, x21", + "subs x21, x20, #0x1 (1)", + "mov x26, x21", + "mov x4, x21" ] }, "or eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /-1", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "orr w4, w4, w20", - "mov x26, x4", - "tst w4, w4" + "mov x21, x4", + "orr w22, w21, w20", + "mov x4, x22", + "mov x26, x22", + "tst w22, w22" ] }, "or rax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /-1", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "orr x4, x4, x20", - "mov x26, x4", - "tst x4, x4" + "mov x21, x4", + "orr x22, x21, x20", + "mov x4, x22", + "mov x26, x22", + "tst x22, x22" ] }, "adc eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x83 /2", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "adcs w26, w4, w20", - "mov x4, x26" + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "adcs w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "adc rax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x83 /2", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "mvn w27, w4", - "adcs x26, x4, x20", - "mov x4, x26" + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "adcs x22, x21, x20", + "mov x26, x22", + "mov x4, x22" ] }, "sbb eax, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "GROUP1 0x83 /3", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", "cfinv", - "sbcs w26, w4, w20", + "sbcs w22, w21, w20", "cfinv", - "mov x4, x26" + "mov x26, x22", + "mov x4, x22" ] }, "sbb rax, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "GROUP1 0x83 /3", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "mvn w27, w4", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", "cfinv", - "sbcs x26, x4, x20", + "sbcs x22, x21, x20", "cfinv", - "mov x4, x26" + "mov x26, x22", + "mov x4, x22" ] }, "and eax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /4", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "ands w26, w4, w20", - "mov x4, x26" + "mov x21, x4", + "ands w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "and rax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /4", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "ands x26, x4, x20", - "mov x4, x26" + "mov x21, x4", + "ands x22, x21, x20", + "mov x26, x22", + "mov x4, x22" ] }, "sub eax, -1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x83 /5", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "subs w26, w4, w20", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "cfinv", - "mov x4, x26" + "mov x4, x22" ] }, "sub rax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x83 /5", "ExpectedArm64ASM": [ - "mvn w27, w4", - "adds x26, x4, #0x1 (1)", + "mov x20, x4", + "mvn w21, w20", + "mov x27, x21", + "adds x21, x20, #0x1 (1)", + "mov x26, x21", "cfinv", - "mov x4, x26" + "mov x4, x21" ] }, "xor eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /6", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "eor w4, w4, w20", - "mov x26, x4", - "tst w4, w4" + "mov x21, x4", + "eor w22, w21, w20", + "mov x4, x22", + "mov x26, x22", + "tst w22, w22" ] }, "xor rax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /6", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "eor x4, x4, x20", - "mov x26, x4", - "tst x4, x4" + "mov x21, x4", + "eor x22, x21, x20", + "mov x4, x22", + "mov x26, x22", + "tst x22, x22" ] }, "cmp eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x83 /7", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "subs w26, w4, w20", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "cfinv" ] }, "cmp rax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /7", "ExpectedArm64ASM": [ - "mvn w27, w4", - "adds x26, x4, #0x1 (1)", + "mov x20, x4", + "mvn w21, w20", + "mov x27, x21", + "adds x21, x20, #0x1 (1)", + "mov x26, x21", "cfinv" ] }, "rol al, 2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xC0 /0", "ExpectedArm64ASM": [ - "mov w20, w4", - "bfi w20, w4, #8, #8", - "bfi w20, w20, #16, #16", - "ror w20, w20, #30", - "bfxil x4, x20, #0, #8", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "mov w21, w20", + "bfi w21, w20, #8, #8", + "mov w22, w21", + "bfi w22, w21, #16, #16", + "ror w21, w22, #30", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "rmif x21, #63, #nzCv" ] }, "ror al, 2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP2 0xC0 /1", "ExpectedArm64ASM": [ - "mov w20, w4", - "bfi w20, w4, #8, #8", - "ror w20, w20, #2", - "bfxil x4, x20, #0, #8", - "rmif x20, #6, #nzCv" + "mov x20, x4", + "mov w21, w20", + "bfi w21, w20, #8, #8", + "ror w22, w21, #2", + "mov x21, x20", + "bfxil x21, x22, #0, #8", + "mov x4, x21", + "rmif x22, #6, #nzCv" ] }, "rcl al, 2": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 32, "Comment": "GROUP2 0xC0 /2", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "mov w21, #0x0", - "cset w22, hs", - "bfi x21, x20, #55, #8", - "bfi x21, x22, #63, #1", - "bfi x21, x20, #46, #8", - "bfi x21, x22, #54, #1", - "bfi x21, x20, #37, #8", - "bfi x21, x22, #45, #1", - "bfi x21, x20, #28, #8", - "bfi x21, x22, #36, #1", - "bfi x21, x20, #19, #8", - "bfi x21, x22, #27, #1", - "mov x0, x21", - "bfxil x0, x20, #0, #8", - "mov x20, x0", - "ror x21, x20, #62", - "bfxil x4, x21, #0, #8", - "ror x20, x20, #61", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "uxtb w21, w20", + "mov w22, #0x0", + "cset w23, hs", + "mov x24, x22", + "bfi x24, x21, #55, #8", + "mov x22, x24", + "bfi x22, x23, #63, #1", + "mov x24, x22", + "bfi x24, x21, #46, #8", + "mov x22, x24", + "bfi x22, x23, #54, #1", + "mov x24, x22", + "bfi x24, x21, #37, #8", + "mov x22, x24", + "bfi x22, x23, #45, #1", + "mov x24, x22", + "bfi x24, x21, #28, #8", + "mov x22, x24", + "bfi x22, x23, #36, #1", + "mov x24, x22", + "bfi x24, x21, #19, #8", + "mov x22, x24", + "bfi x22, x23, #27, #1", + "mov x23, x22", + "bfxil x23, x21, #0, #8", + "ror x21, x23, #62", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "ror x20, x23, #61", + "rmif x20, #63, #nzCv" ] }, "rcr al, 2": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xC0 /3", "ExpectedArm64ASM": [ "cset w20, hs", - "uxtb w21, w4", - "mov x0, x21", - "bfi x0, x20, #8, #1", - "mov x20, x0", - "bfi x20, x20, #9, #9", - "bfi x20, x20, #18, #18", - "bfi x20, x20, #36, #9", - "lsr w21, w20, #2", - "bfxil x4, x21, #0, #8", + "mov x21, x4", + "uxtb w22, w21", + "mov x23, x22", + "bfi x23, x20, #8, #1", + "mov x20, x23", + "bfi x20, x23, #9, #9", + "mov x22, x20", + "bfi x22, x20, #18, #18", + "mov x20, x22", + "bfi x20, x22, #36, #9", + "lsr w22, w20, #2", + "mov x23, x21", + "bfxil x23, x22, #0, #8", + "mov x4, x23", "rmif x20, #0, #nzCv" ] }, "shl al, 2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xC0 /4", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "lsl w26, w20, #2", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24", - "rmif x20, #5, #nzCv" + "mov x20, x4", + "uxtb w21, w20", + "lsl w22, w21, #2", + "mov x23, x20", + "bfxil x23, x22, #0, #8", + "mov x4, x23", + "cmn wzr, w22, lsl #24", + "rmif x21, #5, #nzCv", + "mov x26, x22" ] }, "shr al, 2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xC0 /5", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "lsr w26, w20, #2", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24", - "rmif x20, #0, #nzCv" + "mov x20, x4", + "uxtb w21, w20", + "lsr w22, w21, #2", + "mov x23, x20", + "bfxil x23, x22, #0, #8", + "mov x4, x23", + "cmn wzr, w22, lsl #24", + "rmif x21, #0, #nzCv", + "mov x26, x22" ] }, "sar al, 2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xC0 /7", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "sxtb x20, w20", - "asr x26, x20, #2", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24", - "rmif x20, #0, #nzCv" + "mov x20, x4", + "uxtb w21, w20", + "sxtb x22, w21", + "asr x21, x22, #2", + "mov x23, x20", + "bfxil x23, x21, #0, #8", + "mov x4, x23", + "cmn wzr, w21, lsl #24", + "rmif x22, #0, #nzCv", + "mov x26, x21" ] }, "rol ax, 2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP2 0xC1 /0", "ExpectedArm64ASM": [ - "mov w20, w4", - "bfi w20, w4, #16, #16", - "ror w20, w20, #30", - "bfxil x4, x20, #0, #16", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "mov w21, w20", + "bfi w21, w20, #16, #16", + "ror w22, w21, #30", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21", + "rmif x22, #63, #nzCv" ] }, "rol eax, 2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP2 0xC1 /0", "ExpectedArm64ASM": [ - "ror w4, w4, #30", - "rmif x4, #63, #nzCv" + "mov x20, x4", + "ror w21, w20, #30", + "mov x4, x21", + "rmif x21, #63, #nzCv" ] }, "rol rax, 2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP2 0xC1 /0", "ExpectedArm64ASM": [ - "ror x4, x4, #62", - "rmif x4, #63, #nzCv" + "mov x20, x4", + "ror x21, x20, #62", + "mov x4, x21", + "rmif x21, #63, #nzCv" ] }, "ror ax, 2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP2 0xC1 /1", "ExpectedArm64ASM": [ - "mov w20, w4", - "bfi w20, w4, #16, #16", - "ror w20, w20, #2", - "bfxil x4, x20, #0, #16", - "rmif x20, #14, #nzCv" + "mov x20, x4", + "mov w21, w20", + "bfi w21, w20, #16, #16", + "ror w22, w21, #2", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21", + "rmif x22, #14, #nzCv" ] }, "ror eax, 2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP2 0xC1 /1", "ExpectedArm64ASM": [ - "ror w4, w4, #2", - "rmif x4, #30, #nzCv" + "mov x20, x4", + "ror w21, w20, #2", + "mov x4, x21", + "rmif x21, #30, #nzCv" ] }, "ror rax, 2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP2 0xC1 /1", "ExpectedArm64ASM": [ - "ror x4, x4, #2", - "rmif x4, #62, #nzCv" + "mov x20, x4", + "ror x21, x20, #2", + "mov x4, x21", + "rmif x21, #62, #nzCv" ] }, "rcl ax, 2": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 24, "Comment": "GROUP2 0xC1 /2", "ExpectedArm64ASM": [ - "uxth w20, w4", - "mov w21, #0x0", - "cset w22, hs", - "bfi x21, x20, #47, #16", - "bfi x21, x22, #63, #1", - "bfi x21, x20, #30, #16", - "bfi x21, x22, #46, #1", - "bfi x21, x20, #13, #16", - "bfi x21, x22, #29, #1", - "mov x0, x21", - "bfxil x0, x20, #0, #16", - "mov x20, x0", - "ror x21, x20, #62", - "bfxil x4, x21, #0, #16", - "ror x20, x20, #61", + "mov x20, x4", + "uxth w21, w20", + "mov w22, #0x0", + "cset w23, hs", + "mov x24, x22", + "bfi x24, x21, #47, #16", + "mov x22, x24", + "bfi x22, x23, #63, #1", + "mov x24, x22", + "bfi x24, x21, #30, #16", + "mov x22, x24", + "bfi x22, x23, #46, #1", + "mov x24, x22", + "bfi x24, x21, #13, #16", + "mov x22, x24", + "bfi x22, x23, #29, #1", + "mov x23, x22", + "bfxil x23, x21, #0, #16", + "ror x21, x23, #62", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22", + "ror x20, x23, #61", "rmif x20, #63, #nzCv" ] }, "rcl eax, 2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xC1 /2", "ExpectedArm64ASM": [ - "lsl w20, w4, #2", - "cset w21, hs", - "orr w20, w20, w4, lsr #31", - "rmif x4, #29, #nzCv", - "orr w4, w20, w21, lsl #1" + "mov x20, x4", + "lsl w21, w20, #2", + "cset w22, hs", + "orr w23, w21, w20, lsr #31", + "rmif x20, #29, #nzCv", + "orr w20, w23, w22, lsl #1", + "mov x4, x20" ] }, "rcl rax, 2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xC1 /2", "ExpectedArm64ASM": [ - "lsl x20, x4, #2", - "cset w21, hs", - "orr x20, x20, x4, lsr #63", - "rmif x4, #61, #nzCv", - "orr x4, x20, x21, lsl #1" + "mov x20, x4", + "lsl x21, x20, #2", + "cset w22, hs", + "orr x23, x21, x20, lsr #63", + "rmif x20, #61, #nzCv", + "orr x20, x23, x22, lsl #1", + "mov x4, x20" ] }, "rcr ax, 2": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 14, "Comment": "GROUP2 0xC1 /3", "ExpectedArm64ASM": [ "cset w20, hs", - "uxth w21, w4", - "mov x0, x21", - "bfi x0, x20, #16, #1", - "mov x20, x0", - "bfi x20, x20, #17, #17", - "bfi x20, x20, #34, #17", - "lsr w21, w20, #2", - "bfxil x4, x21, #0, #16", - "rmif x20, #0, #nzCv" + "mov x21, x4", + "uxth w22, w21", + "mov x23, x22", + "bfi x23, x20, #16, #1", + "mov x20, x23", + "bfi x20, x23, #17, #17", + "mov x22, x20", + "bfi x22, x20, #34, #17", + "lsr w20, w22, #2", + "mov x23, x21", + "bfxil x23, x20, #0, #16", + "mov x4, x23", + "rmif x22, #0, #nzCv" ] }, "rcr eax, 2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xC1 /3", "ExpectedArm64ASM": [ - "lsr w20, w4, #2", - "cset w21, hs", - "orr w20, w20, w4, lsl #31", - "rmif x4, #0, #nzCv", - "orr w4, w20, w21, lsl #30" + "mov x20, x4", + "lsr w21, w20, #2", + "cset w22, hs", + "orr w23, w21, w20, lsl #31", + "rmif x20, #0, #nzCv", + "orr w20, w23, w22, lsl #30", + "mov x4, x20" ] }, "rcr rax, 2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xC1 /3", "ExpectedArm64ASM": [ - "lsr x20, x4, #2", - "cset w21, hs", - "orr x20, x20, x4, lsl #63", - "rmif x4, #0, #nzCv", - "orr x4, x20, x21, lsl #62" + "mov x20, x4", + "lsr x21, x20, #2", + "cset w22, hs", + "orr x23, x21, x20, lsl #63", + "rmif x20, #0, #nzCv", + "orr x20, x23, x22, lsl #62", + "mov x4, x20" ] }, "shl ax, 2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xC1 /4", "ExpectedArm64ASM": [ - "uxth w20, w4", - "lsl w26, w20, #2", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "rmif x20, #13, #nzCv" + "mov x20, x4", + "uxth w21, w20", + "lsl w22, w21, #2", + "mov x23, x20", + "bfxil x23, x22, #0, #16", + "mov x4, x23", + "cmn wzr, w22, lsl #16", + "rmif x21, #13, #nzCv", + "mov x26, x22" ] }, "shl eax, 2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xC1 /4", "ExpectedArm64ASM": [ - "mov w20, w4", - "lsl w4, w20, #2", - "tst w4, w4", - "rmif x20, #29, #nzCv", - "mov x26, x4" + "mov x20, x4", + "mov w21, w20", + "lsl w20, w21, #2", + "mov x4, x20", + "tst w20, w20", + "rmif x21, #29, #nzCv", + "mov x26, x20" ] }, "shl rax, 2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP2 0xC1 /4", "ExpectedArm64ASM": [ "mov x20, x4", - "lsl x4, x20, #2", - "tst x4, x4", + "lsl x21, x20, #2", + "mov x4, x21", + "tst x21, x21", "rmif x20, #61, #nzCv", - "mov x26, x4" + "mov x26, x21" ] }, "shr ax, 2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xC1 /5", "ExpectedArm64ASM": [ - "uxth w20, w4", - "lsr w26, w20, #2", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "rmif x20, #0, #nzCv" + "mov x20, x4", + "uxth w21, w20", + "lsr w22, w21, #2", + "mov x23, x20", + "bfxil x23, x22, #0, #16", + "mov x4, x23", + "cmn wzr, w22, lsl #16", + "rmif x21, #0, #nzCv", + "mov x26, x22" ] }, "shr eax, 2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xC1 /5", "ExpectedArm64ASM": [ - "mov w20, w4", - "lsr w4, w20, #2", - "tst w4, w4", - "rmif x20, #0, #nzCv", - "mov x26, x4" + "mov x20, x4", + "mov w21, w20", + "lsr w20, w21, #2", + "mov x4, x20", + "tst w20, w20", + "rmif x21, #0, #nzCv", + "mov x26, x20" ] }, "shr rax, 2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP2 0xC1 /5", "ExpectedArm64ASM": [ "mov x20, x4", - "lsr x4, x20, #2", - "tst x4, x4", + "lsr x21, x20, #2", + "mov x4, x21", + "tst x21, x21", "rmif x20, #0, #nzCv", - "mov x26, x4" + "mov x26, x21" ] }, "sar ax, 2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xC1 /7", "ExpectedArm64ASM": [ - "uxth w20, w4", - "sxth x20, w20", - "asr x26, x20, #2", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "rmif x20, #0, #nzCv" + "mov x20, x4", + "uxth w21, w20", + "sxth x22, w21", + "asr x21, x22, #2", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "cmn wzr, w21, lsl #16", + "rmif x22, #0, #nzCv", + "mov x26, x21" ] }, "sar eax, 2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xC1 /7", "ExpectedArm64ASM": [ - "mov w20, w4", - "asr w4, w20, #2", - "tst w4, w4", - "rmif x20, #0, #nzCv", - "mov x26, x4" + "mov x20, x4", + "mov w21, w20", + "asr w20, w21, #2", + "mov x4, x20", + "tst w20, w20", + "rmif x21, #0, #nzCv", + "mov x26, x20" ] }, "sar rax, 2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP2 0xC1 /7", "ExpectedArm64ASM": [ "mov x20, x4", - "asr x4, x20, #2", - "tst x4, x4", + "asr x21, x20, #2", + "mov x4, x21", + "tst x21, x21", "rmif x20, #0, #nzCv", - "mov x26, x4" + "mov x26, x21" ] }, "rol al, 1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd0 /0", "ExpectedArm64ASM": [ - "mov w20, w4", - "bfi w20, w4, #8, #8", - "bfi w20, w20, #16, #16", - "ror w20, w20, #31", - "bfxil x4, x20, #0, #8", - "rmif x20, #63, #nzCv", - "eor w20, w20, w20, lsr #7", + "mov x20, x4", + "mov w21, w20", + "bfi w21, w20, #8, #8", + "mov w22, w21", + "bfi w22, w21, #16, #16", + "ror w21, w22, #31", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "rmif x21, #63, #nzCv", + "eor w20, w21, w21, lsr #7", "rmif x20, #0, #nzcV" ] }, "ror al, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xd0 /1", "ExpectedArm64ASM": [ - "mov w20, w4", - "bfi w20, w4, #8, #8", - "ror w20, w20, #1", - "bfxil x4, x20, #0, #8", - "rmif x20, #6, #nzCv", - "eor w20, w20, w20, lsr #1", + "mov x20, x4", + "mov w21, w20", + "bfi w21, w20, #8, #8", + "ror w22, w21, #1", + "mov x21, x20", + "bfxil x21, x22, #0, #8", + "mov x4, x21", + "rmif x22, #6, #nzCv", + "eor w20, w22, w22, lsr #1", "rmif x20, #6, #nzcV" ] }, "rcl al, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xd0 /2", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "cset w21, hs", - "orr w21, w21, w20, lsl #1", - "bfxil x4, x21, #0, #8", - "rmif x20, #6, #nzCv", - "eor w20, w21, w20", + "mov x20, x4", + "uxtb w21, w20", + "cset w22, hs", + "orr w23, w22, w21, lsl #1", + "mov x22, x20", + "bfxil x22, x23, #0, #8", + "mov x4, x22", + "rmif x21, #6, #nzCv", + "eor w20, w23, w21", "rmif x20, #7, #nzcV" ] }, "rcr al, 1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd0 /3", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "cset w21, hs", - "rmif x20, #63, #nzCv", - "ubfx w20, w20, #1, #7", - "bfi w20, w21, #7, #1", - "bfxil x4, x20, #0, #8", - "eor w20, w20, w20, lsr #1", + "mov x20, x4", + "uxtb w21, w20", + "cset w22, hs", + "rmif x21, #63, #nzCv", + "ubfx w23, w21, #1, #7", + "mov w21, w23", + "bfi w21, w22, #7, #1", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "eor w20, w21, w21, lsr #1", "rmif x20, #6, #nzcV" ] }, "shl al, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd0 /4", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "lsl w26, w20, #1", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24", - "rmif x20, #6, #nzCv", - "eor w20, w26, w20", + "mov x20, x4", + "uxtb w21, w20", + "lsl w22, w21, #1", + "mov x23, x20", + "bfxil x23, x22, #0, #8", + "mov x4, x23", + "cmn wzr, w22, lsl #24", + "rmif x21, #6, #nzCv", + "mov x26, x22", + "eor w20, w22, w21", "rmif x20, #7, #nzcV" ] }, "shr al, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xd0 /5", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "lsr w26, w20, #1", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv", - "rmif x20, #7, #nzcV" + "mov x20, x4", + "uxtb w21, w20", + "lsr w22, w21, #1", + "mov x23, x20", + "bfxil x23, x22, #0, #8", + "mov x4, x23", + "cmn wzr, w22, lsl #24", + "rmif x21, #63, #nzCv", + "mov x26, x22", + "rmif x21, #7, #nzcV" ] }, "sar al, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xd0 /7", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "sxtb x20, w20", - "asr x26, x20, #1", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "uxtb w21, w20", + "sxtb x22, w21", + "asr x21, x22, #1", + "mov x23, x20", + "bfxil x23, x21, #0, #8", + "mov x4, x23", + "cmn wzr, w21, lsl #24", + "rmif x22, #63, #nzCv", + "mov x26, x21" ] }, "rol ax, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xd1 /0", "ExpectedArm64ASM": [ - "mov w20, w4", - "bfi w20, w4, #16, #16", - "ror w20, w20, #31", - "bfxil x4, x20, #0, #16", - "rmif x20, #63, #nzCv", - "eor w20, w20, w20, lsr #15", + "mov x20, x4", + "mov w21, w20", + "bfi w21, w20, #16, #16", + "ror w22, w21, #31", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21", + "rmif x22, #63, #nzCv", + "eor w20, w22, w22, lsr #15", "rmif x20, #0, #nzcV" ] }, "rol eax, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP2 0xd1 /0", "ExpectedArm64ASM": [ - "ror w4, w4, #31", - "rmif x4, #63, #nzCv", - "eor w20, w4, w4, lsr #31", + "mov x20, x4", + "ror w21, w20, #31", + "mov x4, x21", + "rmif x21, #63, #nzCv", + "eor w20, w21, w21, lsr #31", "rmif x20, #0, #nzcV" ] }, "rol rax, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP2 0xd1 /0", "ExpectedArm64ASM": [ - "ror x4, x4, #63", - "rmif x4, #63, #nzCv", - "eor x20, x4, x4, lsr #63", + "mov x20, x4", + "ror x21, x20, #63", + "mov x4, x21", + "rmif x21, #63, #nzCv", + "eor x20, x21, x21, lsr #63", "rmif x20, #0, #nzcV" ] }, "ror ax, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xd1 /1", "ExpectedArm64ASM": [ - "mov w20, w4", - "bfi w20, w4, #16, #16", - "ror w20, w20, #1", - "bfxil x4, x20, #0, #16", - "rmif x20, #14, #nzCv", - "eor w20, w20, w20, lsr #1", + "mov x20, x4", + "mov w21, w20", + "bfi w21, w20, #16, #16", + "ror w22, w21, #1", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21", + "rmif x22, #14, #nzCv", + "eor w20, w22, w22, lsr #1", "rmif x20, #14, #nzcV" ] }, "ror eax, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP2 0xd1 /1", "ExpectedArm64ASM": [ - "ror w4, w4, #1", - "rmif x4, #30, #nzCv", - "eor w20, w4, w4, lsr #1", + "mov x20, x4", + "ror w21, w20, #1", + "mov x4, x21", + "rmif x21, #30, #nzCv", + "eor w20, w21, w21, lsr #1", "rmif x20, #30, #nzcV" ] }, "ror rax, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP2 0xd1 /1", "ExpectedArm64ASM": [ - "ror x4, x4, #1", - "rmif x4, #62, #nzCv", - "eor x20, x4, x4, lsr #1", + "mov x20, x4", + "ror x21, x20, #1", + "mov x4, x21", + "rmif x21, #62, #nzCv", + "eor x20, x21, x21, lsr #1", "rmif x20, #62, #nzcV" ] }, "rcl ax, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xd1 /2", "ExpectedArm64ASM": [ - "uxth w20, w4", - "cset w21, hs", - "orr w21, w21, w20, lsl #1", - "bfxil x4, x21, #0, #16", - "rmif x20, #14, #nzCv", - "eor w20, w21, w20", + "mov x20, x4", + "uxth w21, w20", + "cset w22, hs", + "orr w23, w22, w21, lsl #1", + "mov x22, x20", + "bfxil x22, x23, #0, #16", + "mov x4, x22", + "rmif x21, #14, #nzCv", + "eor w20, w23, w21", "rmif x20, #15, #nzcV" ] }, "rcl eax, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP2 0xd1 /2", "ExpectedArm64ASM": [ - "mov w20, w4", - "cset w21, hs", - "orr w4, w21, w20, lsl #1", - "rmif x20, #30, #nzCv", - "eor w20, w4, w20", + "mov x20, x4", + "mov w21, w20", + "cset w20, hs", + "orr w22, w20, w21, lsl #1", + "mov x4, x22", + "rmif x21, #30, #nzCv", + "eor w20, w22, w21", "rmif x20, #31, #nzcV" ] }, "rcl rax, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd1 /2", "ExpectedArm64ASM": [ "mov x20, x4", "cset w21, hs", - "orr x4, x21, x20, lsl #1", + "orr x22, x21, x20, lsl #1", + "mov x4, x22", "rmif x20, #62, #nzCv", - "eor x20, x4, x20", - "rmif x20, #63, #nzcV" + "eor x21, x22, x20", + "rmif x21, #63, #nzcV" ] }, "rcr ax, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xd1 /3", "ExpectedArm64ASM": [ - "cset w20, hs", - "rmif x4, #63, #nzCv", - "ubfx w21, w4, #1, #15", - "orr w20, w21, w20, lsl #15", - "bfxil x4, x20, #0, #16", - "eor x20, x20, x20, lsr #1", + "mov x20, x4", + "cset w21, hs", + "rmif x20, #63, #nzCv", + "ubfx w22, w20, #1, #15", + "orr w23, w22, w21, lsl #15", + "mov x21, x20", + "bfxil x21, x23, #0, #16", + "mov x4, x21", + "eor x20, x23, x23, lsr #1", "rmif x20, #14, #nzcV" ] }, "rcr eax, 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd1 /3", "ExpectedArm64ASM": [ - "cset w20, hs", - "rmif x4, #63, #nzCv", - "extr w4, w20, w4, #1", - "eor x20, x4, x4, lsr #1", + "mov x20, x4", + "cset w21, hs", + "rmif x20, #63, #nzCv", + "extr w22, w21, w20, #1", + "mov x4, x22", + "eor x20, x22, x22, lsr #1", "rmif x20, #30, #nzcV" ] }, "rcr rax, 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd1 /3", "ExpectedArm64ASM": [ - "cset w20, hs", - "rmif x4, #63, #nzCv", - "extr x4, x20, x4, #1", - "eor x20, x4, x4, lsr #1", + "mov x20, x4", + "cset w21, hs", + "rmif x20, #63, #nzCv", + "extr x22, x21, x20, #1", + "mov x4, x22", + "eor x20, x22, x22, lsr #1", "rmif x20, #62, #nzcV" ] }, "shl ax, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd1 /4", "ExpectedArm64ASM": [ - "uxth w20, w4", - "lsl w26, w20, #1", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "rmif x20, #14, #nzCv", - "eor w20, w26, w20", + "mov x20, x4", + "uxth w21, w20", + "lsl w22, w21, #1", + "mov x23, x20", + "bfxil x23, x22, #0, #16", + "mov x4, x23", + "cmn wzr, w22, lsl #16", + "rmif x21, #14, #nzCv", + "mov x26, x22", + "eor w20, w22, w21", "rmif x20, #15, #nzcV" ] }, "shl eax, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xd1 /4", "ExpectedArm64ASM": [ - "mov w20, w4", - "lsl w4, w20, #1", - "tst w4, w4", - "rmif x20, #30, #nzCv", - "mov x26, x4", - "eor w20, w4, w20", - "rmif x20, #31, #nzcV" + "mov x20, x4", + "mov w21, w20", + "lsl w20, w21, #1", + "mov x4, x20", + "tst w20, w20", + "rmif x21, #30, #nzCv", + "mov x26, x20", + "eor w22, w20, w21", + "rmif x22, #31, #nzcV" ] }, "shl rax, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP2 0xd1 /4", "ExpectedArm64ASM": [ "mov x20, x4", - "lsl x4, x20, #1", - "tst x4, x4", + "lsl x21, x20, #1", + "mov x4, x21", + "tst x21, x21", "rmif x20, #62, #nzCv", - "mov x26, x4", - "eor x20, x4, x20", - "rmif x20, #63, #nzcV" + "mov x26, x21", + "eor x22, x21, x20", + "rmif x22, #63, #nzcV" ] }, "shr ax, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xd1 /5", "ExpectedArm64ASM": [ - "uxth w20, w4", - "lsr w26, w20, #1", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "rmif x20, #63, #nzCv", - "rmif x20, #15, #nzcV" + "mov x20, x4", + "uxth w21, w20", + "lsr w22, w21, #1", + "mov x23, x20", + "bfxil x23, x22, #0, #16", + "mov x4, x23", + "cmn wzr, w22, lsl #16", + "rmif x21, #63, #nzCv", + "mov x26, x22", + "rmif x21, #15, #nzcV" ] }, "shr eax, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP2 0xd1 /5", "ExpectedArm64ASM": [ - "mov w20, w4", - "lsr w4, w20, #1", - "tst w4, w4", - "rmif x20, #63, #nzCv", - "mov x26, x4", - "rmif x20, #31, #nzcV" + "mov x20, x4", + "mov w21, w20", + "lsr w20, w21, #1", + "mov x4, x20", + "tst w20, w20", + "rmif x21, #63, #nzCv", + "mov x26, x20", + "rmif x21, #31, #nzcV" ] }, "shr rax, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd1 /5", "ExpectedArm64ASM": [ "mov x20, x4", - "lsr x4, x20, #1", - "tst x4, x4", + "lsr x21, x20, #1", + "mov x4, x21", + "tst x21, x21", "rmif x20, #63, #nzCv", - "mov x26, x4", + "mov x26, x21", "rmif x20, #63, #nzcV" ] }, "sar ax, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xd1 /7", "ExpectedArm64ASM": [ - "uxth w20, w4", - "sxth x20, w20", - "asr x26, x20, #1", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "uxth w21, w20", + "sxth x22, w21", + "asr x21, x22, #1", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "cmn wzr, w21, lsl #16", + "rmif x22, #63, #nzCv", + "mov x26, x21" ] }, "sar eax, 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP2 0xd1 /7", "ExpectedArm64ASM": [ - "mov w20, w4", - "asr w4, w20, #1", - "tst w4, w4", - "rmif x20, #63, #nzCv", - "mov x26, x4" + "mov x20, x4", + "mov w21, w20", + "asr w20, w21, #1", + "mov x4, x20", + "tst w20, w20", + "rmif x21, #63, #nzCv", + "mov x26, x20" ] }, "sar rax, 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP2 0xd1 /7", "ExpectedArm64ASM": [ "mov x20, x4", - "asr x4, x20, #1", - "tst x4, x4", + "asr x21, x20, #1", + "mov x4, x21", + "tst x21, x21", "rmif x20, #63, #nzCv", - "mov x26, x4" + "mov x26, x21" ] }, "rol al, cl": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 17, "Comment": "GROUP2 0xd2 /0", "ExpectedArm64ASM": [ - "and x20, x5, #0x1f", - "cbz x20, #+0x28", - "mov w20, w4", - "bfi w20, w4, #8, #8", - "bfi w20, w20, #16, #16", - "neg w21, w5", - "ror w20, w20, w21", - "bfxil x4, x20, #0, #8", + "mov x20, x5", + "and x21, x20, #0x1f", + "cbz x21, #+0x3c", + "mov x20, x5", + "mov x21, x4", + "mov w22, w21", + "bfi w22, w21, #8, #8", + "mov w23, w22", + "bfi w23, w22, #16, #16", + "neg w22, w20", + "ror w20, w23, w22", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22", "rmif x20, #63, #nzCv", - "eor w20, w20, w20, lsr #7", - "rmif x20, #0, #nzcV" + "eor w21, w20, w20, lsr #7", + "rmif x21, #0, #nzcV" ] }, "ror al, cl": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xd2 /1", "ExpectedArm64ASM": [ - "and x20, x5, #0x1f", - "cbz x20, #+0x24", - "mov w20, w4", - "bfi w20, w4, #8, #8", - "bfi w20, w20, #16, #16", - "ror w20, w20, w5", - "bfxil x4, x20, #0, #8", - "rmif x20, #6, #nzCv", - "eor w20, w20, w20, lsr #1", + "mov x20, x5", + "and x21, x20, #0x1f", + "cbz x21, #+0x38", + "mov x20, x5", + "mov x21, x4", + "mov w22, w21", + "bfi w22, w21, #8, #8", + "mov w23, w22", + "bfi w23, w22, #16, #16", + "ror w22, w23, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x4, x20", + "rmif x22, #6, #nzCv", + "eor w20, w22, w22, lsr #1", "rmif x20, #6, #nzcV" ] }, "rcl al, cl": { - "ExpectedInstructionCount": 28, + "ExpectedInstructionCount": 42, "Comment": "GROUP2 0xd2 /2", "ExpectedArm64ASM": [ - "and w20, w5, #0x1f", - "cbz x20, #+0x6c", - "and w20, w5, #0x1f", - "uxtb w21, w4", - "mov w22, #0x0", - "cset w23, hs", - "bfi x22, x21, #55, #8", - "bfi x22, x23, #63, #1", - "bfi x22, x21, #46, #8", - "bfi x22, x23, #54, #1", - "bfi x22, x21, #37, #8", - "bfi x22, x23, #45, #1", - "bfi x22, x21, #28, #8", - "bfi x22, x23, #36, #1", - "bfi x22, x21, #19, #8", - "bfi x22, x23, #27, #1", - "mov x0, x22", - "bfxil x0, x21, #0, #8", - "mov x21, x0", - "neg w22, w20", - "ror x22, x21, x22", - "bfxil x4, x22, #0, #8", - "mov w23, #0x3f", - "sub x20, x23, x20", - "ror x20, x21, x20", + "mov x20, x5", + "and w21, w20, #0x1f", + "cbz x21, #+0xa0", + "mov x20, x5", + "and w21, w20, #0x1f", + "mov x20, x4", + "uxtb w22, w20", + "mov w23, #0x0", + "cset w24, hs", + "mov x25, x23", + "bfi x25, x22, #55, #8", + "mov x23, x25", + "bfi x23, x24, #63, #1", + "mov x25, x23", + "bfi x25, x22, #46, #8", + "mov x23, x25", + "bfi x23, x24, #54, #1", + "mov x25, x23", + "bfi x25, x22, #37, #8", + "mov x23, x25", + "bfi x23, x24, #45, #1", + "mov x25, x23", + "bfi x25, x22, #28, #8", + "mov x23, x25", + "bfi x23, x24, #36, #1", + "mov x25, x23", + "bfi x25, x22, #19, #8", + "mov x23, x25", + "bfi x23, x24, #27, #1", + "mov x24, x23", + "bfxil x24, x22, #0, #8", + "neg w22, w21", + "ror x23, x24, x22", + "mov x22, x20", + "bfxil x22, x23, #0, #8", + "mov x4, x22", + "mov w20, #0x3f", + "sub x22, x20, x21", + "ror x20, x24, x22", "rmif x20, #63, #nzCv", - "eor x20, x20, x22, lsr #7", - "rmif x20, #0, #nzcV" + "eor x21, x20, x23, lsr #7", + "rmif x21, #0, #nzcV" ] }, "rcr al, cl": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 24, "Comment": "GROUP2 0xd2 /3", "ExpectedArm64ASM": [ - "and w20, w5, #0x1f", - "cbz x20, #+0x40", - "cset w20, hs", - "uxtb w21, w4", - "mov x0, x21", - "bfi x0, x20, #8, #1", - "mov x20, x0", - "bfi x20, x20, #9, #9", - "bfi x20, x20, #18, #18", - "bfi x20, x20, #36, #9", - "lsr w21, w20, w5", - "bfxil x4, x21, #0, #8", - "sub w22, w5, #0x1 (1)", - "lsr w20, w20, w22", + "mov x20, x5", + "and w21, w20, #0x1f", + "cbz x21, #+0x58", + "mov x20, x5", + "cset w21, hs", + "mov x22, x4", + "uxtb w23, w22", + "mov x24, x23", + "bfi x24, x21, #8, #1", + "mov x21, x24", + "bfi x21, x24, #9, #9", + "mov x23, x21", + "bfi x23, x21, #18, #18", + "mov x21, x23", + "bfi x21, x23, #36, #9", + "lsr w23, w21, w20", + "mov x24, x22", + "bfxil x24, x23, #0, #8", + "mov x4, x24", + "sub w22, w20, #0x1 (1)", + "lsr w20, w21, w22", "rmif x20, #63, #nzCv", - "eor w20, w21, w21, lsr #1", + "eor w20, w23, w23, lsr #1", "rmif x20, #6, #nzcV" ] }, "shl al, cl": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 20, "Comment": "GROUP2 0xd2 /4", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxtb w21, w5", - "lsl w22, w20, w21", - "bfxil x4, x22, #0, #8", - "cbz w21, #+0x24", + "mov x20, x4", + "uxtb w21, w20", + "mov x22, x5", + "uxtb w23, w22", + "lsl w22, w21, w23", + "mov x24, x20", + "bfxil x24, x22, #0, #8", + "mov x4, x24", + "mov x20, x26", + "mov x24, x20", + "cbz w23, #+0x24", "cmn wzr, w22, lsl #24", - "mov x26, x22", + "mov x24, x22", "mov w0, #0x8", - "sub w0, w0, w21", - "lsr w0, w20, w0", - "eor w2, w20, w22", + "sub w0, w0, w23", + "lsr w0, w21, w0", + "eor w2, w21, w22", "rmif x0, #63, #nzCv", - "rmif x2, #7, #nzcV" + "rmif x2, #7, #nzcV", + "mov x26, x24" ] }, "shr al, cl": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 19, "Comment": "GROUP2 0xd2 /5", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxtb w21, w5", - "lsr w22, w20, w21", - "bfxil x4, x22, #0, #8", - "cbz w21, #+0x20", + "mov x20, x4", + "uxtb w21, w20", + "mov x22, x5", + "uxtb w23, w22", + "lsr w22, w21, w23", + "mov x24, x20", + "bfxil x24, x22, #0, #8", + "mov x4, x24", + "mov x20, x26", + "mov x24, x20", + "cbz w23, #+0x20", "cmn wzr, w22, lsl #24", - "mov x26, x22", - "sub x0, x21, #0x1 (1)", - "lsr w0, w20, w0", - "eor w2, w20, w22", + "mov x24, x22", + "sub x0, x23, #0x1 (1)", + "lsr w0, w21, w0", + "eor w2, w21, w22", "rmif x0, #63, #nzCv", - "rmif x2, #7, #nzcV" + "rmif x2, #7, #nzcV", + "mov x26, x24" ] }, "sar al, cl": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 18, "Comment": "GROUP2 0xd2 /7", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxtb w21, w5", - "sxtb x20, w20", - "asr w22, w20, w21", - "bfxil x4, x22, #0, #8", - "cbz w21, #+0x18", - "cmn wzr, w22, lsl #24", - "mov x26, x22", - "sub x0, x21, #0x1 (1)", - "lsr w0, w20, w0", - "rmif x0, #63, #nzCv" + "mov x20, x4", + "uxtb w21, w20", + "mov x22, x5", + "uxtb w23, w22", + "sxtb x22, w21", + "asr w21, w22, w23", + "mov x24, x20", + "bfxil x24, x21, #0, #8", + "mov x4, x24", + "mov x20, x26", + "mov x24, x20", + "cbz w23, #+0x18", + "cmn wzr, w21, lsl #24", + "mov x24, x21", + "sub x0, x23, #0x1 (1)", + "lsr w0, w22, w0", + "rmif x0, #63, #nzCv", + "mov x26, x24" ] }, "rol ax, cl": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 15, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ - "and x20, x5, #0x1f", - "cbz x20, #+0x24", - "mov w20, w4", - "bfi w20, w4, #16, #16", - "neg w21, w5", - "ror w20, w20, w21", - "bfxil x4, x20, #0, #16", + "mov x20, x5", + "and x21, x20, #0x1f", + "cbz x21, #+0x34", + "mov x20, x5", + "mov x21, x4", + "mov w22, w21", + "bfi w22, w21, #16, #16", + "neg w23, w20", + "ror w20, w22, w23", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22", "rmif x20, #63, #nzCv", - "eor w20, w20, w20, lsr #15", - "rmif x20, #0, #nzcV" + "eor w21, w20, w20, lsr #15", + "rmif x21, #0, #nzcV" ] }, "rol eax, cl": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ - "and x20, x5, #0x1f", - "cbz x20, #+0x18", - "neg w20, w5", - "ror w4, w4, w20", - "rmif x4, #63, #nzCv", - "eor w20, w4, w4, lsr #31", - "rmif x20, #0, #nzcV" + "mov x20, x5", + "and x21, x20, #0x1f", + "cbz x21, #+0x24", + "mov x20, x5", + "mov x21, x4", + "neg w22, w20", + "ror w20, w21, w22", + "mov x4, x20", + "rmif x20, #63, #nzCv", + "eor w21, w20, w20, lsr #31", + "rmif x21, #0, #nzcV" ] }, "rol rax, cl": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ - "and x20, x5, #0x3f", - "cbz x20, #+0x18", - "neg x20, x5", - "ror x4, x4, x20", - "rmif x4, #63, #nzCv", - "eor x20, x4, x4, lsr #63", - "rmif x20, #0, #nzcV" + "mov x20, x5", + "and x21, x20, #0x3f", + "cbz x21, #+0x24", + "mov x20, x5", + "mov x21, x4", + "neg x22, x20", + "ror x20, x21, x22", + "mov x4, x20", + "rmif x20, #63, #nzCv", + "eor x21, x20, x20, lsr #63", + "rmif x21, #0, #nzcV" ] }, "ror ax, cl": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 14, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ - "and x20, x5, #0x1f", - "cbz x20, #+0x20", - "mov w20, w4", - "bfi w20, w4, #16, #16", - "ror w20, w20, w5", - "bfxil x4, x20, #0, #16", - "rmif x20, #14, #nzCv", - "eor w20, w20, w20, lsr #1", + "mov x20, x5", + "and x21, x20, #0x1f", + "cbz x21, #+0x30", + "mov x20, x5", + "mov x21, x4", + "mov w22, w21", + "bfi w22, w21, #16, #16", + "ror w23, w22, w20", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x4, x20", + "rmif x23, #14, #nzCv", + "eor w20, w23, w23, lsr #1", "rmif x20, #14, #nzcV" ] }, "ror eax, cl": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ - "and x20, x5, #0x1f", - "cbz x20, #+0x14", - "ror w4, w4, w5", - "rmif x4, #30, #nzCv", - "eor w20, w4, w4, lsr #1", + "mov x20, x5", + "and x21, x20, #0x1f", + "cbz x21, #+0x20", + "mov x20, x5", + "mov x21, x4", + "ror w22, w21, w20", + "mov x4, x22", + "rmif x22, #30, #nzCv", + "eor w20, w22, w22, lsr #1", "rmif x20, #30, #nzcV" ] }, "ror rax, cl": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ - "and x20, x5, #0x3f", - "cbz x20, #+0x14", - "ror x4, x4, x5", - "rmif x4, #62, #nzCv", - "eor x20, x4, x4, lsr #1", + "mov x20, x5", + "and x21, x20, #0x3f", + "cbz x21, #+0x20", + "mov x20, x5", + "mov x21, x4", + "ror x22, x21, x20", + "mov x4, x22", + "rmif x22, #62, #nzCv", + "eor x20, x22, x22, lsr #1", "rmif x20, #62, #nzcV" ] }, "rcl ax, cl": { - "ExpectedInstructionCount": 24, + "ExpectedInstructionCount": 34, "Comment": "GROUP2 0xd3 /2", "ExpectedArm64ASM": [ - "and w20, w5, #0x1f", - "cbz x20, #+0x5c", - "and w20, w5, #0x1f", - "uxth w21, w4", - "mov w22, #0x0", - "cset w23, hs", - "bfi x22, x21, #47, #16", - "bfi x22, x23, #63, #1", - "bfi x22, x21, #30, #16", - "bfi x22, x23, #46, #1", - "bfi x22, x21, #13, #16", - "bfi x22, x23, #29, #1", - "mov x0, x22", - "bfxil x0, x21, #0, #16", - "mov x21, x0", - "neg w22, w20", - "ror x22, x21, x22", - "bfxil x4, x22, #0, #16", - "mov w23, #0x3f", - "sub x20, x23, x20", - "ror x20, x21, x20", + "mov x20, x5", + "and w21, w20, #0x1f", + "cbz x21, #+0x80", + "mov x20, x5", + "and w21, w20, #0x1f", + "mov x20, x4", + "uxth w22, w20", + "mov w23, #0x0", + "cset w24, hs", + "mov x25, x23", + "bfi x25, x22, #47, #16", + "mov x23, x25", + "bfi x23, x24, #63, #1", + "mov x25, x23", + "bfi x25, x22, #30, #16", + "mov x23, x25", + "bfi x23, x24, #46, #1", + "mov x25, x23", + "bfi x25, x22, #13, #16", + "mov x23, x25", + "bfi x23, x24, #29, #1", + "mov x24, x23", + "bfxil x24, x22, #0, #16", + "neg w22, w21", + "ror x23, x24, x22", + "mov x22, x20", + "bfxil x22, x23, #0, #16", + "mov x4, x22", + "mov w20, #0x3f", + "sub x22, x20, x21", + "ror x20, x24, x22", "rmif x20, #63, #nzCv", - "eor x20, x20, x22, lsr #15", - "rmif x20, #0, #nzcV" + "eor x21, x20, x23, lsr #15", + "rmif x21, #0, #nzcV" ] }, "rcl eax, cl": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 18, "Comment": "GROUP2 0xd3 /2", "ExpectedArm64ASM": [ - "and w20, w5, #0x1f", - "cbz x20, #+0x34", - "lsl w20, w4, w5", - "cset w21, hs", - "neg w22, w5", - "lsr w23, w4, w22", - "orr w20, w20, w23, lsr #1", - "lsr w22, w4, w22", + "mov x20, x5", + "and w21, w20, #0x1f", + "cbz x21, #+0x40", + "mov x20, x5", + "mov x21, x4", + "lsl w22, w21, w20", + "cset w23, hs", + "neg w24, w20", + "lsr w25, w21, w24", + "orr w30, w22, w25, lsr #1", + "lsr w22, w21, w24", "rmif x22, #63, #nzCv", - "sub w23, w5, #0x1 (1)", - "lsl w21, w21, w23", - "orr w4, w20, w21", - "eor w20, w4, w22, lsl #31", - "rmif x20, #31, #nzcV" + "sub w21, w20, #0x1 (1)", + "lsl w20, w23, w21", + "orr w21, w30, w20", + "eor w20, w21, w22, lsl #31", + "rmif x20, #31, #nzcV", + "mov x4, x21" ] }, "rcl rax, cl": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 18, "Comment": "GROUP2 0xd3 /2", "ExpectedArm64ASM": [ - "and x20, x5, #0x3f", - "cbz x20, #+0x34", - "lsl x20, x4, x5", - "cset w21, hs", - "neg x22, x5", - "lsr x23, x4, x22", - "orr x20, x20, x23, lsr #1", - "lsr x22, x4, x22", + "mov x20, x5", + "and x21, x20, #0x3f", + "cbz x21, #+0x40", + "mov x20, x5", + "mov x21, x4", + "lsl x22, x21, x20", + "cset w23, hs", + "neg x24, x20", + "lsr x25, x21, x24", + "orr x30, x22, x25, lsr #1", + "lsr x22, x21, x24", "rmif x22, #63, #nzCv", - "sub x23, x5, #0x1 (1)", - "lsl x21, x21, x23", - "orr x4, x20, x21", - "eor x20, x4, x22, lsl #63", - "rmif x20, #63, #nzcV" + "sub x21, x20, #0x1 (1)", + "lsl x20, x23, x21", + "orr x21, x30, x20", + "eor x20, x21, x22, lsl #63", + "rmif x20, #63, #nzcV", + "mov x4, x21" ] }, "rcr ax, cl": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 22, "Comment": "GROUP2 0xd3 /3", "ExpectedArm64ASM": [ - "and w20, w5, #0x1f", - "cbz x20, #+0x3c", - "cset w20, hs", - "uxth w21, w4", - "mov x0, x21", - "bfi x0, x20, #16, #1", - "mov x20, x0", - "bfi x20, x20, #17, #17", - "bfi x20, x20, #34, #17", - "lsr w21, w20, w5", - "bfxil x4, x21, #0, #16", - "sub w22, w5, #0x1 (1)", - "lsr w20, w20, w22", + "mov x20, x5", + "and w21, w20, #0x1f", + "cbz x21, #+0x50", + "mov x20, x5", + "cset w21, hs", + "mov x22, x4", + "uxth w23, w22", + "mov x24, x23", + "bfi x24, x21, #16, #1", + "mov x21, x24", + "bfi x21, x24, #17, #17", + "mov x23, x21", + "bfi x23, x21, #34, #17", + "lsr w21, w23, w20", + "mov x24, x22", + "bfxil x24, x21, #0, #16", + "mov x4, x24", + "sub w22, w20, #0x1 (1)", + "lsr w20, w23, w22", "rmif x20, #63, #nzCv", "eor w20, w21, w21, lsr #1", "rmif x20, #14, #nzcV" ] }, "rcr eax, cl": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 18, "Comment": "GROUP2 0xd3 /3", "ExpectedArm64ASM": [ - "and w20, w5, #0x1f", - "cbz x20, #+0x34", - "lsr w20, w4, w5", - "cset w21, hs", - "neg w22, w5", - "lsl w23, w4, w22", - "orr w20, w20, w23, lsl #1", - "sub w23, w5, #0x1 (1)", - "lsr w23, w4, w23", - "rmif x23, #63, #nzCv", - "lsl w21, w21, w22", - "orr w4, w20, w21", - "eor w20, w4, w4, lsr #1", - "rmif x20, #30, #nzcV" + "mov x20, x5", + "and w21, w20, #0x1f", + "cbz x21, #+0x40", + "mov x20, x5", + "mov x21, x4", + "lsr w22, w21, w20", + "cset w23, hs", + "neg w24, w20", + "lsl w25, w21, w24", + "orr w30, w22, w25, lsl #1", + "sub w22, w20, #0x1 (1)", + "lsr w20, w21, w22", + "rmif x20, #63, #nzCv", + "lsl w20, w23, w24", + "orr w21, w30, w20", + "eor w20, w21, w21, lsr #1", + "rmif x20, #30, #nzcV", + "mov x4, x21" ] }, "rcr rax, cl": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 18, "Comment": "GROUP2 0xd3 /3", "ExpectedArm64ASM": [ - "and x20, x5, #0x3f", - "cbz x20, #+0x34", - "lsr x20, x4, x5", - "cset w21, hs", - "neg x22, x5", - "lsl x23, x4, x22", - "orr x20, x20, x23, lsl #1", - "sub x23, x5, #0x1 (1)", - "lsr x23, x4, x23", - "rmif x23, #63, #nzCv", - "lsl x21, x21, x22", - "orr x4, x20, x21", - "eor x20, x4, x4, lsr #1", - "rmif x20, #62, #nzcV" + "mov x20, x5", + "and x21, x20, #0x3f", + "cbz x21, #+0x40", + "mov x20, x5", + "mov x21, x4", + "lsr x22, x21, x20", + "cset w23, hs", + "neg x24, x20", + "lsl x25, x21, x24", + "orr x30, x22, x25, lsl #1", + "sub x22, x20, #0x1 (1)", + "lsr x20, x21, x22", + "rmif x20, #63, #nzCv", + "lsl x20, x23, x24", + "orr x21, x30, x20", + "eor x20, x21, x21, lsr #1", + "rmif x20, #62, #nzcV", + "mov x4, x21" ] }, "shl ax, cl": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 20, "Comment": "GROUP2 0xd3 /4", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w5", - "lsl w22, w20, w21", - "bfxil x4, x22, #0, #16", - "cbz w21, #+0x24", + "mov x20, x4", + "uxth w21, w20", + "mov x22, x5", + "uxth w23, w22", + "lsl w22, w21, w23", + "mov x24, x20", + "bfxil x24, x22, #0, #16", + "mov x4, x24", + "mov x20, x26", + "mov x24, x20", + "cbz w23, #+0x24", "cmn wzr, w22, lsl #16", - "mov x26, x22", + "mov x24, x22", "mov w0, #0x10", - "sub w0, w0, w21", - "lsr w0, w20, w0", - "eor w2, w20, w22", + "sub w0, w0, w23", + "lsr w0, w21, w0", + "eor w2, w21, w22", "rmif x0, #63, #nzCv", - "rmif x2, #15, #nzcV" + "rmif x2, #15, #nzcV", + "mov x26, x24" ] }, "shl eax, cl": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xd3 /4", "ExpectedArm64ASM": [ - "mov w20, w4", - "mov w21, w5", - "lsl w4, w20, w21", - "cbz w21, #+0x1c", - "ands w26, w4, w4", - "neg w0, w21", - "lsr w0, w20, w0", - "eor w2, w20, w4", + "mov x20, x4", + "mov w21, w20", + "mov x20, x5", + "mov w22, w20", + "lsl w20, w21, w22", + "mov x4, x20", + "mov x23, x26", + "mov x24, x23", + "cbz w22, #+0x1c", + "ands w24, w20, w20", + "neg w0, w22", + "lsr w0, w21, w0", + "eor w2, w21, w20", "rmif x0, #63, #nzCv", - "rmif x2, #31, #nzcV" + "rmif x2, #31, #nzcV", + "mov x26, x24" ] }, "shl rax, cl": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 14, "Comment": "GROUP2 0xd3 /4", "ExpectedArm64ASM": [ "mov x20, x4", - "lsl x4, x20, x5", - "cbz x5, #+0x1c", - "ands x26, x4, x4", - "neg x0, x5", + "mov x21, x5", + "lsl x22, x20, x21", + "mov x4, x22", + "mov x23, x26", + "mov x24, x23", + "cbz x21, #+0x1c", + "ands x24, x22, x22", + "neg x0, x21", "lsr x0, x20, x0", - "eor x2, x20, x4", + "eor x2, x20, x22", "rmif x0, #63, #nzCv", - "rmif x2, #63, #nzcV" + "rmif x2, #63, #nzcV", + "mov x26, x24" ] }, "shr ax, cl": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 19, "Comment": "GROUP2 0xd3 /5", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w5", - "lsr w22, w20, w21", - "bfxil x4, x22, #0, #16", - "cbz w21, #+0x20", + "mov x20, x4", + "uxth w21, w20", + "mov x22, x5", + "uxth w23, w22", + "lsr w22, w21, w23", + "mov x24, x20", + "bfxil x24, x22, #0, #16", + "mov x4, x24", + "mov x20, x26", + "mov x24, x20", + "cbz w23, #+0x20", "cmn wzr, w22, lsl #16", - "mov x26, x22", - "sub x0, x21, #0x1 (1)", - "lsr w0, w20, w0", - "eor w2, w20, w22", + "mov x24, x22", + "sub x0, x23, #0x1 (1)", + "lsr w0, w21, w0", + "eor w2, w21, w22", "rmif x0, #63, #nzCv", - "rmif x2, #15, #nzcV" + "rmif x2, #15, #nzcV", + "mov x26, x24" ] }, "shr eax, cl": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xd3 /5", "ExpectedArm64ASM": [ - "mov w20, w4", - "mov w21, w5", - "lsr w4, w20, w21", - "cbz w21, #+0x1c", - "ands w26, w4, w4", - "sub x0, x21, #0x1 (1)", - "lsr w0, w20, w0", - "eor w2, w20, w4", + "mov x20, x4", + "mov w21, w20", + "mov x20, x5", + "mov w22, w20", + "lsr w20, w21, w22", + "mov x4, x20", + "mov x23, x26", + "mov x24, x23", + "cbz w22, #+0x1c", + "ands w24, w20, w20", + "sub x0, x22, #0x1 (1)", + "lsr w0, w21, w0", + "eor w2, w21, w20", "rmif x0, #63, #nzCv", - "rmif x2, #31, #nzcV" + "rmif x2, #31, #nzcV", + "mov x26, x24" ] }, "shr rax, cl": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 14, "Comment": "GROUP2 0xd3 /5", "ExpectedArm64ASM": [ "mov x20, x4", - "lsr x4, x20, x5", - "cbz x5, #+0x1c", - "ands x26, x4, x4", - "sub x0, x5, #0x1 (1)", + "mov x21, x5", + "lsr x22, x20, x21", + "mov x4, x22", + "mov x23, x26", + "mov x24, x23", + "cbz x21, #+0x1c", + "ands x24, x22, x22", + "sub x0, x21, #0x1 (1)", "lsr x0, x20, x0", - "eor x2, x20, x4", + "eor x2, x20, x22", "rmif x0, #63, #nzCv", - "rmif x2, #63, #nzcV" + "rmif x2, #63, #nzcV", + "mov x26, x24" ] }, "sar ax, cl": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 18, "Comment": "GROUP2 0xd3 /7", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w5", - "sxth x20, w20", - "asr w22, w20, w21", - "bfxil x4, x22, #0, #16", - "cbz w21, #+0x18", - "cmn wzr, w22, lsl #16", - "mov x26, x22", - "sub x0, x21, #0x1 (1)", - "lsr w0, w20, w0", - "rmif x0, #63, #nzCv" + "mov x20, x4", + "uxth w21, w20", + "mov x22, x5", + "uxth w23, w22", + "sxth x22, w21", + "asr w21, w22, w23", + "mov x24, x20", + "bfxil x24, x21, #0, #16", + "mov x4, x24", + "mov x20, x26", + "mov x24, x20", + "cbz w23, #+0x18", + "cmn wzr, w21, lsl #16", + "mov x24, x21", + "sub x0, x23, #0x1 (1)", + "lsr w0, w22, w0", + "rmif x0, #63, #nzCv", + "mov x26, x24" ] }, "sar eax, cl": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 14, "Comment": "GROUP2 0xd3 /7", "ExpectedArm64ASM": [ - "mov w20, w4", - "mov w21, w5", - "asr w4, w20, w21", - "cbz w21, #+0x14", - "ands w26, w4, w4", - "sub x0, x21, #0x1 (1)", - "lsr w0, w20, w0", - "rmif x0, #63, #nzCv" + "mov x20, x4", + "mov w21, w20", + "mov x20, x5", + "mov w22, w20", + "asr w20, w21, w22", + "mov x4, x20", + "mov x23, x26", + "mov x24, x23", + "cbz w22, #+0x14", + "ands w24, w20, w20", + "sub x0, x22, #0x1 (1)", + "lsr w0, w21, w0", + "rmif x0, #63, #nzCv", + "mov x26, x24" ] }, "sar rax, cl": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd3 /7", "ExpectedArm64ASM": [ "mov x20, x4", - "asr x4, x20, x5", - "cbz x5, #+0x14", - "ands x26, x4, x4", - "sub x0, x5, #0x1 (1)", + "mov x21, x5", + "asr x22, x20, x21", + "mov x4, x22", + "mov x23, x26", + "mov x24, x23", + "cbz x21, #+0x14", + "ands x24, x22, x22", + "sub x0, x21, #0x1 (1)", "lsr x0, x20, x0", - "rmif x0, #63, #nzCv" + "rmif x0, #63, #nzCv", + "mov x26, x24" ] }, "test bl, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP2 0xf6 /0", "ExpectedArm64ASM": [ - "and w26, w7, #0x1", - "cmn wzr, w26, lsl #24" + "mov x20, x7", + "and w21, w20, #0x1", + "cmn wzr, w21, lsl #24", + "mov x26, x21" ] }, "not bl": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf6 /2", "ExpectedArm64ASM": [ - "eor x7, x7, #0xff" + "mov x20, x7", + "eor x21, x20, #0xff", + "mov x7, x21" ] }, "neg bl": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xf6 /3", "ExpectedArm64ASM": [ - "mov x27, x7", - "cmp wzr, w27, lsl #24", - "neg w26, w27", + "mov x20, x7", + "mov x27, x20", + "cmp wzr, w20, lsl #24", + "neg w21, w20", + "mov x26, x21", "cfinv", - "mov x7, x27", - "bfxil x7, x26, #0, #8" + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x7, x22" ] }, "mul bl": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xf6 /4", "ExpectedArm64ASM": [ - "uxtb x20, w7", - "uxtb x21, w4", - "mul x20, x20, x21", - "bfxil x4, x20, #0, #16", - "ubfx x20, x20, #8, #8", + "mov x20, x7", + "mov x21, x4", + "uxtb x22, w20", + "uxtb x20, w21", + "mul x23, x22, x20", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x4, x20", + "ubfx x20, x23, #8, #8", "cmp x20, #0x0 (0)", "ccmn xzr, #0, #nzCV, eq" ] }, "imul bl": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xf6 /5", "ExpectedArm64ASM": [ - "sxtb x20, w7", - "sxtb x21, w4", - "mul x20, x20, x21", - "bfxil x4, x20, #0, #16", - "sbfx x21, x20, #8, #8", - "sbfx x20, x20, #7, #1", - "cmp x21, x20", + "mov x20, x7", + "mov x21, x4", + "sxtb x22, w20", + "sxtb x20, w21", + "mul x23, x22, x20", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x4, x20", + "sbfx x20, x23, #8, #8", + "sbfx x21, x23, #7, #1", + "cmp x20, x21", "ccmn xzr, #0, #nzCV, eq" ] }, "div bl": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xf6 /6", "ExpectedArm64ASM": [ - "uxtb w20, w7", - "uxth w21, w4", - "uxth w0, w21", - "uxth w1, w20", - "udiv w22, w0, w1", - "uxth w0, w21", - "uxth w1, w20", + "mov x20, x7", + "uxtb w21, w20", + "mov x20, x4", + "uxth w22, w20", + "uxth w0, w22", + "uxth w1, w21", + "udiv w23, w0, w1", + "uxth w0, w22", + "uxth w1, w21", "udiv w2, w0, w1", - "msub w20, w2, w1, w0", - "mov x0, x22", - "bfi x0, x20, #8, #8", - "mov x20, x0", - "bfxil x4, x20, #0, #16" + "msub w24, w2, w1, w0", + "mov x21, x23", + "bfi x21, x24, #8, #8", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "idiv bl": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 14, "Comment": "GROUP2 0xf6 /7", "ExpectedArm64ASM": [ - "uxtb w20, w7", - "uxth w21, w4", - "sxth x21, w21", - "sxtb x20, w20", - "sdiv x22, x21, x20", - "sdiv x0, x21, x20", - "msub x20, x0, x20, x21", - "mov x0, x22", - "bfi x0, x20, #8, #8", - "mov x20, x0", - "bfxil x4, x20, #0, #16" + "mov x20, x7", + "uxtb w21, w20", + "mov x20, x4", + "uxth w22, w20", + "sxth x23, w22", + "sxtb x22, w21", + "sdiv x21, x23, x22", + "sdiv x0, x23, x22", + "msub x24, x0, x22, x23", + "mov x22, x21", + "bfi x22, x24, #8, #8", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "test bx, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "and w26, w7, #0x1", - "cmn wzr, w26, lsl #16" + "mov x20, x7", + "and w21, w20, #0x1", + "cmn wzr, w21, lsl #16", + "mov x26, x21" ] }, "test ebx, 1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "ands w26, w7, #0x1" + "mov x20, x7", + "ands w21, w20, #0x1", + "mov x26, x21" ] }, "test rbx, 1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "ands x26, x7, #0x1" + "mov x20, x7", + "ands x21, x20, #0x1", + "mov x26, x21" ] }, "test bx, -1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "mov x26, x7", - "cmn wzr, w26, lsl #16" + "mov x20, x7", + "mov x21, x20", + "cmn wzr, w21, lsl #16", + "mov x26, x21" ] }, "test ebx, -1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "ands w26, w7, w7" + "mov x20, x7", + "ands w21, w20, w20", + "mov x26, x21" ] }, "test rbx, -1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "ands x26, x7, x7" + "mov x20, x7", + "ands x21, x20, x20", + "mov x26, x21" ] }, "neg bx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xf7 /2", "ExpectedArm64ASM": [ - "mov x27, x7", - "cmp wzr, w27, lsl #16", - "neg w26, w27", + "mov x20, x7", + "mov x27, x20", + "cmp wzr, w20, lsl #16", + "neg w21, w20", + "mov x26, x21", "cfinv", - "mov x7, x27", - "bfxil x7, x26, #0, #16" + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x7, x22" ] }, "neg ebx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP2 0xf7 /2", "ExpectedArm64ASM": [ - "mov x27, x7", - "negs w26, w27", + "mov x20, x7", + "mov x27, x20", + "negs w21, w20", + "mov x26, x21", "cfinv", - "mov x7, x26" + "mov x7, x21" ] }, "neg rbx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP2 0xf7 /2", "ExpectedArm64ASM": [ - "mov x27, x7", - "negs x26, x27", + "mov x20, x7", + "mov x27, x20", + "negs x21, x20", + "mov x26, x21", "cfinv", - "mov x7, x26" + "mov x7, x21" ] }, "mul bx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 15, "Comment": "GROUP2 0xf7 /3", "ExpectedArm64ASM": [ - "uxth x20, w7", - "uxth x21, w4", - "mul x20, x20, x21", - "bfxil x4, x20, #0, #16", - "ubfx x20, x20, #16, #16", - "bfxil x6, x20, #0, #16", + "mov x20, x7", + "mov x21, x4", + "uxth x22, w20", + "uxth x20, w21", + "mul x23, x22, x20", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x4, x20", + "ubfx x20, x23, #16, #16", + "mov x21, x6", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x6, x22", "cmp x20, #0x0 (0)", "ccmn xzr, #0, #nzCV, eq" ] }, "mul ebx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xf7 /3", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "mul x20, x20, x21", - "mov w4, w20", - "lsr x6, x20, #32", - "cmp x6, #0x0 (0)", + "mov x20, x7", + "mov x21, x4", + "mov w22, w20", + "mov w20, w21", + "mul x21, x22, x20", + "mov w20, w21", + "lsr x22, x21, #32", + "mov x4, x20", + "mov x6, x22", + "cmp x22, #0x0 (0)", "ccmn xzr, #0, #nzCV, eq" ] }, "mul rbx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP2 0xf7 /3", "ExpectedArm64ASM": [ - "mov x20, x4", - "mul x4, x7, x20", - "umulh x6, x7, x20", - "cmp x6, #0x0 (0)", + "mov x20, x7", + "mov x21, x4", + "mul x22, x20, x21", + "umulh x23, x20, x21", + "mov x4, x22", + "mov x6, x23", + "cmp x23, #0x0 (0)", "ccmn xzr, #0, #nzCV, eq" ] }, "imul bx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xf7 /5", "ExpectedArm64ASM": [ - "sxth x20, w7", - "sxth x21, w4", - "mul x20, x20, x21", - "bfxil x4, x20, #0, #16", - "sbfx x21, x20, #16, #16", - "bfxil x6, x21, #0, #16", - "sbfx x20, x20, #15, #1", - "cmp x21, x20", + "mov x20, x7", + "mov x21, x4", + "sxth x22, w20", + "sxth x20, w21", + "mul x23, x22, x20", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x4, x20", + "sbfx x20, x23, #16, #16", + "mov x21, x6", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x6, x22", + "sbfx x21, x23, #15, #1", + "cmp x20, x21", "ccmn xzr, #0, #nzCV, eq" ] }, "imul ebx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 14, "Comment": "GROUP2 0xf7 /5", "ExpectedArm64ASM": [ - "sxtw x20, w7", - "sxtw x21, w4", - "mul x20, x20, x21", - "mov w4, w20", - "lsr x6, x20, #32", - "asr x21, x20, #32", - "sxtw x20, w20", - "sbfx x20, x20, #31, #1", - "cmp x21, x20", + "mov x20, x7", + "mov x21, x4", + "sxtw x22, w20", + "sxtw x20, w21", + "mul x21, x22, x20", + "mov w20, w21", + "lsr x22, x21, #32", + "asr x23, x21, #32", + "sxtw x24, w21", + "mov x4, x20", + "mov x6, x22", + "sbfx x20, x24, #31, #1", + "cmp x23, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "imul rbx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xf7 /5", "ExpectedArm64ASM": [ - "smulh x6, x7, x4", - "mul x4, x7, x4", - "asr x20, x4, #63", - "cmp x6, x20", + "mov x20, x7", + "mov x21, x4", + "smulh x22, x20, x21", + "mul x23, x20, x21", + "mov x4, x23", + "mov x6, x22", + "asr x20, x23, #63", + "cmp x22, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "div bx": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 19, "Comment": "GROUP2 0xf7 /6", "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w4", - "uxth w22, w6", - "uxth w0, w21", - "bfi w0, w22, #16, #16", - "udiv w23, w0, w20", - "uxth w0, w21", - "bfi w0, w22, #16, #16", - "udiv w1, w0, w20", - "msub w20, w1, w20, w0", - "bfxil x4, x23, #0, #16", - "bfxil x6, x20, #0, #16" + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "uxth w22, w20", + "mov x23, x6", + "uxth w24, w23", + "uxth w0, w22", + "bfi w0, w24, #16, #16", + "udiv w25, w0, w21", + "uxth w0, w22", + "bfi w0, w24, #16, #16", + "udiv w1, w0, w21", + "msub w30, w1, w21, w0", + "mov x21, x20", + "bfxil x21, x25, #0, #16", + "mov x4, x21", + "mov x20, x23", + "bfxil x20, x30, #0, #16", + "mov x6, x20" ] }, "inc al": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": "GROUP3 0xfe /0", "ExpectedArm64ASM": [ - "uxtb w27, w4", - "add w26, w27, #0x1 (1)", - "setf8 w26", - "bic w20, w26, w27", - "rmif x20, #7, #nzcV", - "bfxil x4, x26, #0, #8" + "mov x20, x4", + "uxtb w21, w20", + "add w22, w21, #0x1 (1)", + "mov x26, x22", + "mov x27, x21", + "setf8 w22", + "bic w23, w22, w21", + "rmif x23, #7, #nzcV", + "mov x21, x20", + "bfxil x21, x22, #0, #8", + "mov x4, x21" ] }, "dec al": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": "GROUP3 0xfe /1", "ExpectedArm64ASM": [ - "uxtb w27, w4", - "sub w26, w27, #0x1 (1)", - "setf8 w26", - "bic w20, w27, w26", - "rmif x20, #7, #nzcV", - "bfxil x4, x26, #0, #8" + "mov x20, x4", + "uxtb w21, w20", + "sub w22, w21, #0x1 (1)", + "mov x26, x22", + "mov x27, x21", + "setf8 w22", + "bic w23, w21, w22", + "rmif x23, #7, #nzcV", + "mov x21, x20", + "bfxil x21, x22, #0, #8", + "mov x4, x21" ] }, "inc ax": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": "GROUP4 0xfe /0", "ExpectedArm64ASM": [ - "uxth w27, w4", - "add w26, w27, #0x1 (1)", - "setf16 w26", - "bic w20, w26, w27", - "rmif x20, #15, #nzcV", - "bfxil x4, x26, #0, #16" + "mov x20, x4", + "uxth w21, w20", + "add w22, w21, #0x1 (1)", + "mov x26, x22", + "mov x27, x21", + "setf16 w22", + "bic w23, w22, w21", + "rmif x23, #15, #nzcV", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "inc eax": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP4 0xfe /0", "ExpectedArm64ASM": [ - "mov x27, x4", - "cset w20, hs", - "adds w26, w27, #0x1 (1)", - "rmif x20, #63, #nzCv", - "mov x4, x26" + "mov x20, x4", + "cset w21, hs", + "mov x27, x20", + "adds w22, w20, #0x1 (1)", + "mov x26, x22", + "rmif x21, #63, #nzCv", + "mov x4, x22" ] }, "inc rax": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP4 0xfe /0", "ExpectedArm64ASM": [ - "mov x27, x4", - "cset w20, hs", - "adds x26, x27, #0x1 (1)", - "rmif x20, #63, #nzCv", - "mov x4, x26" + "mov x20, x4", + "cset w21, hs", + "mov x27, x20", + "adds x22, x20, #0x1 (1)", + "mov x26, x22", + "rmif x21, #63, #nzCv", + "mov x4, x22" ] }, "dec ax": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": "GROUP4 0xfe /1", "ExpectedArm64ASM": [ - "uxth w27, w4", - "sub w26, w27, #0x1 (1)", - "setf16 w26", - "bic w20, w27, w26", - "rmif x20, #15, #nzcV", - "bfxil x4, x26, #0, #16" + "mov x20, x4", + "uxth w21, w20", + "sub w22, w21, #0x1 (1)", + "mov x26, x22", + "mov x27, x21", + "setf16 w22", + "bic w23, w21, w22", + "rmif x23, #15, #nzcV", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "dec eax": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP4 0xfe /1", "ExpectedArm64ASM": [ - "mov x27, x4", - "cset w20, hs", - "subs w26, w27, #0x1 (1)", - "rmif x20, #63, #nzCv", - "mov x4, x26" + "mov x20, x4", + "cset w21, hs", + "mov x27, x20", + "subs w22, w20, #0x1 (1)", + "mov x26, x22", + "rmif x21, #63, #nzCv", + "mov x4, x22" ] }, "dec rax": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP4 0xfe /1", "ExpectedArm64ASM": [ - "mov x27, x4", - "cset w20, hs", - "subs x26, x27, #0x1 (1)", - "rmif x20, #63, #nzCv", - "mov x4, x26" + "mov x20, x4", + "cset w21, hs", + "mov x27, x20", + "subs x22, x20, #0x1 (1)", + "mov x26, x22", + "rmif x21, #63, #nzCv", + "mov x4, x22" ] } } diff --git a/unittests/InstructionCountCI/FlagM/Primary_32Bit.json b/unittests/InstructionCountCI/FlagM/Primary_32Bit.json index 4c0f7dc6ce..0b718153e3 100644 --- a/unittests/InstructionCountCI/FlagM/Primary_32Bit.json +++ b/unittests/InstructionCountCI/FlagM/Primary_32Bit.json @@ -12,370 +12,503 @@ }, "Instructions": { "push es": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x06", "ExpectedArm64ASM": [ - "ldrh w20, [x28, #136]", - "str w20, [x8, #-4]!" + "mov w20, w8", + "ldrh w21, [x28, #136]", + "mov w22, w20", + "str w21, [x22, #-4]!", + "mov w8, w22" ] }, "pop es": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x07", "ExpectedArm64ASM": [ - "ldr w20, [x8]", - "add x8, x8, #0x4 (4)", - "strh w20, [x28, #136]", - "ubfx w20, w20, #3, #13", + "mov w20, w8", + "ldr w21, [x20]", + "add x22, x20, #0x4 (4)", + "mov w8, w22", + "strh w21, [x28, #136]", + "ubfx w20, w21, #3, #13", "add x0, x28, x20, lsl #2", - "ldr w20, [x0, #896]", - "str w20, [x28, #152]" + "ldr w21, [x0, #896]", + "str w21, [x28, #152]" ] }, "push cs": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0e", "ExpectedArm64ASM": [ - "ldrh w20, [x28, #138]", - "str w20, [x8, #-4]!" + "mov w20, w8", + "ldrh w21, [x28, #138]", + "mov w22, w20", + "str w21, [x22, #-4]!", + "mov w8, w22" ] }, "push ss": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x16", "ExpectedArm64ASM": [ - "ldrh w20, [x28, #140]", - "str w20, [x8, #-4]!" + "mov w20, w8", + "ldrh w21, [x28, #140]", + "mov w22, w20", + "str w21, [x22, #-4]!", + "mov w8, w22" ] }, "pop ss": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x17", "ExpectedArm64ASM": [ - "ldr w20, [x8]", - "add x8, x8, #0x4 (4)", - "strh w20, [x28, #140]", - "ubfx w20, w20, #3, #13", + "mov w20, w8", + "ldr w21, [x20]", + "add x22, x20, #0x4 (4)", + "mov w8, w22", + "strh w21, [x28, #140]", + "ubfx w20, w21, #3, #13", "add x0, x28, x20, lsl #2", - "ldr w20, [x0, #896]", - "str w20, [x28, #160]" + "ldr w21, [x0, #896]", + "str w21, [x28, #160]" ] }, "push ds": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x1e", "ExpectedArm64ASM": [ - "ldrh w20, [x28, #142]", - "str w20, [x8, #-4]!" + "mov w20, w8", + "ldrh w21, [x28, #142]", + "mov w22, w20", + "str w21, [x22, #-4]!", + "mov w8, w22" ] }, "pop ds": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x1f", "ExpectedArm64ASM": [ - "ldr w20, [x8]", - "add x8, x8, #0x4 (4)", - "strh w20, [x28, #142]", - "ubfx w20, w20, #3, #13", + "mov w20, w8", + "ldr w21, [x20]", + "add x22, x20, #0x4 (4)", + "mov w8, w22", + "strh w21, [x28, #142]", + "ubfx w20, w21, #3, #13", "add x0, x28, x20, lsl #2", - "ldr w20, [x0, #896]", - "str w20, [x28, #164]" + "ldr w21, [x0, #896]", + "str w21, [x28, #164]" ] }, "daa": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 28, "Comment": "0x27", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "cset w21, hs", - "and x22, x20, #0xf", - "cmp x22, #0x9 (9)", - "cset x22, hi", - "eor w23, w27, w26", - "ubfx w23, w23, #4, #1", - "orr x22, x23, x22", - "cmp x20, #0x99 (153)", + "mov w20, w4", + "uxtb w21, w20", + "cset w22, hs", + "and x23, x21, #0xf", + "cmp x23, #0x9 (9)", + "cset x24, hi", + "mov w23, w27", + "mov w25, w26", + "eor w12, w23, w25", + "ubfx w23, w12, #4, #1", + "orr x25, x23, x24", + "cmp x21, #0x99 (153)", "cset x23, hi", - "orr x21, x21, x23", - "add x23, x20, #0x6 (6)", - "cmp x22, #0x0 (0)", - "csel x20, x23, x20, ne", - "add x23, x20, #0x60 (96)", - "cmp x21, #0x0 (0)", - "csel x26, x23, x20, ne", - "bfxil w4, w26, #0, #8", - "cmn wzr, w26, lsl #24", - "rmif x21, #63, #nzCv", - "eor w27, w26, w22, lsl #4" + "orr x24, x22, x23", + "add x22, x21, #0x6 (6)", + "cmp x25, #0x0 (0)", + "csel x23, x22, x21, ne", + "add x21, x23, #0x60 (96)", + "cmp x24, #0x0 (0)", + "csel x22, x21, x23, ne", + "mov w21, w20", + "bfxil w21, w22, #0, #8", + "mov w4, w21", + "cmn wzr, w22, lsl #24", + "rmif x24, #63, #nzCv", + "mov w26, w22", + "eor w20, w22, w25, lsl #4", + "mov w27, w20" ] }, "das": { - "ExpectedInstructionCount": 24, + "ExpectedInstructionCount": 31, "Comment": "0x2f", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "cset w21, hs", - "and x22, x20, #0xf", - "cmp x22, #0x9 (9)", - "cset x22, hi", - "eor w23, w27, w26", - "ubfx w23, w23, #4, #1", - "orr x22, x23, x22", - "cmp x20, #0x99 (153)", + "mov w20, w4", + "uxtb w21, w20", + "cset w22, hs", + "and x23, x21, #0xf", + "cmp x23, #0x9 (9)", + "cset x24, hi", + "mov w23, w27", + "mov w25, w26", + "eor w12, w23, w25", + "ubfx w23, w12, #4, #1", + "orr x25, x23, x24", + "cmp x21, #0x99 (153)", "cset x23, hi", - "orr x21, x21, x23", - "cmp x20, #0x6 (6)", - "csel x23, x22, x21, lo", - "orr w23, w21, w23", - "sub x24, x20, #0x6 (6)", - "cmp x22, #0x0 (0)", - "csel x20, x24, x20, ne", - "sub x24, x20, #0x60 (96)", - "cmp x21, #0x0 (0)", - "csel x26, x24, x20, ne", - "bfxil w4, w26, #0, #8", - "cmn wzr, w26, lsl #24", + "orr x24, x22, x23", + "cmp x21, #0x6 (6)", + "csel x22, x25, x24, lo", + "orr w23, w24, w22", + "sub x22, x21, #0x6 (6)", + "cmp x25, #0x0 (0)", + "csel x12, x22, x21, ne", + "sub x21, x12, #0x60 (96)", + "cmp x24, #0x0 (0)", + "csel x22, x21, x12, ne", + "mov w21, w20", + "bfxil w21, w22, #0, #8", + "mov w4, w21", + "cmn wzr, w22, lsl #24", "rmif x23, #63, #nzCv", - "eor w27, w26, w22, lsl #4" + "mov w26, w22", + "eor w20, w22, w25, lsl #4", + "mov w27, w20" ] }, "aaa": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 20, "Comment": "0x37", "ExpectedArm64ASM": [ - "and x20, x4, #0xf", - "cmp x20, #0x9 (9)", - "cset x20, hi", - "eor w21, w27, w26", - "ubfx w21, w21, #4, #1", - "orr x20, x21, x20", - "lsl x21, x20, #29", - "eor w27, w26, w20, lsl #4", + "mov w20, w4", + "and x21, x20, #0xf", + "cmp x21, #0x9 (9)", + "cset x22, hi", + "mov w21, w27", + "mov w23, w26", + "eor w24, w21, w23", + "ubfx w21, w24, #4, #1", + "orr x24, x21, x22", + "lsl x21, x24, #29", + "eor w22, w23, w24, lsl #4", + "mov w27, w22", "msr nzcv, x21", - "add w20, w4, #0x106 (262)", - "csel w20, w20, w4, hs", + "add w21, w20, #0x106 (262)", + "csel w22, w21, w20, hs", "mov w21, #0xff0f", - "and w20, w20, w21", - "bfxil w4, w20, #0, #16" + "and w23, w22, w21", + "mov w21, w20", + "bfxil w21, w23, #0, #16", + "mov w4, w21" ] }, "aas": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 20, "Comment": "0x3f", "ExpectedArm64ASM": [ - "and x20, x4, #0xf", - "cmp x20, #0x9 (9)", - "cset x20, hi", - "eor w21, w27, w26", - "ubfx w21, w21, #4, #1", - "orr x20, x21, x20", - "lsl x21, x20, #29", - "eor w27, w26, w20, lsl #4", + "mov w20, w4", + "and x21, x20, #0xf", + "cmp x21, #0x9 (9)", + "cset x22, hi", + "mov w21, w27", + "mov w23, w26", + "eor w24, w21, w23", + "ubfx w21, w24, #4, #1", + "orr x24, x21, x22", + "lsl x21, x24, #29", + "eor w22, w23, w24, lsl #4", + "mov w27, w22", "msr nzcv, x21", - "sub w20, w4, #0x106 (262)", - "csel w20, w20, w4, hs", + "sub w21, w20, #0x106 (262)", + "csel w22, w21, w20, hs", "mov w21, #0xff0f", - "and w20, w20, w21", - "bfxil w4, w20, #0, #16" + "and w23, w22, w21", + "mov w21, w20", + "bfxil w21, w23, #0, #16", + "mov w4, w21" ] }, "inc ax": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": "0x40", "ExpectedArm64ASM": [ - "uxth w27, w4", - "add w26, w27, #0x1 (1)", - "setf16 w26", - "bic w20, w26, w27", - "rmif x20, #15, #nzcV", - "bfxil w4, w26, #0, #16" + "mov w20, w4", + "uxth w21, w20", + "add w22, w21, #0x1 (1)", + "mov w26, w22", + "mov w27, w21", + "setf16 w22", + "bic w23, w22, w21", + "rmif x23, #15, #nzcV", + "mov w21, w20", + "bfxil w21, w22, #0, #16", + "mov w4, w21" ] }, "inc eax": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x40", "ExpectedArm64ASM": [ - "mov w27, w4", - "cset w20, hs", - "adds w26, w27, #0x1 (1)", - "rmif x20, #63, #nzCv", - "mov w4, w26" + "mov w20, w4", + "cset w21, hs", + "mov w27, w20", + "adds w22, w20, #0x1 (1)", + "mov w26, w22", + "rmif x21, #63, #nzCv", + "mov w4, w22" ] }, "dec ax": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": "0x48", "ExpectedArm64ASM": [ - "uxth w27, w4", - "sub w26, w27, #0x1 (1)", - "setf16 w26", - "bic w20, w27, w26", - "rmif x20, #15, #nzcV", - "bfxil w4, w26, #0, #16" + "mov w20, w4", + "uxth w21, w20", + "sub w22, w21, #0x1 (1)", + "mov w26, w22", + "mov w27, w21", + "setf16 w22", + "bic w23, w21, w22", + "rmif x23, #15, #nzcV", + "mov w21, w20", + "bfxil w21, w22, #0, #16", + "mov w4, w21" ] }, "push ax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x50", "ExpectedArm64ASM": [ - "strh w4, [x8, #-2]!" + "mov w20, w4", + "mov w21, w8", + "mov w22, w21", + "strh w20, [x22, #-2]!", + "mov w8, w22" ] }, "push eax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x50", "ExpectedArm64ASM": [ - "str w4, [x8, #-4]!" + "mov w20, w4", + "mov w21, w8", + "mov w22, w21", + "str w20, [x22, #-4]!", + "mov w8, w22" ] }, "dec eax": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x48", "ExpectedArm64ASM": [ - "mov w27, w4", - "cset w20, hs", - "subs w26, w27, #0x1 (1)", - "rmif x20, #63, #nzCv", - "mov w4, w26" + "mov w20, w4", + "cset w21, hs", + "mov w27, w20", + "subs w22, w20, #0x1 (1)", + "mov w26, w22", + "rmif x21, #63, #nzCv", + "mov w4, w22" ] }, "pusha": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 25, "Comment": "0x60", "ExpectedArm64ASM": [ "mov w20, w8", - "str w4, [x20, #-4]!", - "str w5, [x20, #-4]!", - "str w6, [x20, #-4]!", - "str w7, [x20, #-4]!", - "str w8, [x20, #-4]!", - "str w9, [x20, #-4]!", - "str w10, [x20, #-4]!", - "mov w8, w20", - "str w11, [x8, #-4]!" + "mov w21, w4", + "mov w22, w20", + "str w21, [x22, #-4]!", + "mov w21, w5", + "mov w23, w22", + "str w21, [x23, #-4]!", + "mov w21, w6", + "mov w22, w23", + "str w21, [x22, #-4]!", + "mov w21, w7", + "mov w23, w22", + "str w21, [x23, #-4]!", + "mov w21, w23", + "str w20, [x21, #-4]!", + "mov w20, w9", + "mov w22, w21", + "str w20, [x22, #-4]!", + "mov w20, w10", + "mov w21, w22", + "str w20, [x21, #-4]!", + "mov w20, w11", + "mov w22, w21", + "str w20, [x22, #-4]!", + "mov w8, w22" ] }, "pushad": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 25, "Comment": "0x60", "ExpectedArm64ASM": [ "mov w20, w8", - "str w4, [x20, #-4]!", - "str w5, [x20, #-4]!", - "str w6, [x20, #-4]!", - "str w7, [x20, #-4]!", - "str w8, [x20, #-4]!", - "str w9, [x20, #-4]!", - "str w10, [x20, #-4]!", - "mov w8, w20", - "str w11, [x8, #-4]!" + "mov w21, w4", + "mov w22, w20", + "str w21, [x22, #-4]!", + "mov w21, w5", + "mov w23, w22", + "str w21, [x23, #-4]!", + "mov w21, w6", + "mov w22, w23", + "str w21, [x22, #-4]!", + "mov w21, w7", + "mov w23, w22", + "str w21, [x23, #-4]!", + "mov w21, w23", + "str w20, [x21, #-4]!", + "mov w20, w9", + "mov w22, w21", + "str w20, [x22, #-4]!", + "mov w20, w10", + "mov w21, w22", + "str w20, [x21, #-4]!", + "mov w20, w11", + "mov w22, w21", + "str w20, [x22, #-4]!", + "mov w8, w22" ] }, "popa": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 23, "Comment": "0x61", "ExpectedArm64ASM": [ - "ldr w11, [x8]", - "add x20, x8, #0x4 (4)", - "ldr w10, [x20]", - "add x20, x20, #0x4 (4)", - "ldr w9, [x20]", - "add x20, x20, #0x8 (8)", - "ldr w7, [x20]", - "add x20, x20, #0x4 (4)", - "ldr w6, [x20]", - "add x20, x20, #0x4 (4)", - "ldr w5, [x20]", - "add x20, x20, #0x4 (4)", - "ldr w4, [x20]", - "add x8, x20, #0x4 (4)" + "mov w20, w8", + "ldr w21, [x20]", + "mov w11, w21", + "add x21, x20, #0x4 (4)", + "ldr w20, [x21]", + "mov w10, w20", + "add x20, x21, #0x4 (4)", + "ldr w21, [x20]", + "mov w9, w21", + "add x21, x20, #0x8 (8)", + "ldr w20, [x21]", + "mov w7, w20", + "add x20, x21, #0x4 (4)", + "ldr w21, [x20]", + "mov w6, w21", + "add x21, x20, #0x4 (4)", + "ldr w20, [x21]", + "mov w5, w20", + "add x20, x21, #0x4 (4)", + "ldr w21, [x20]", + "mov w4, w21", + "add x21, x20, #0x4 (4)", + "mov w8, w21" ] }, "popad": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 23, "Comment": "0x61", "ExpectedArm64ASM": [ - "ldr w11, [x8]", - "add x20, x8, #0x4 (4)", - "ldr w10, [x20]", - "add x20, x20, #0x4 (4)", - "ldr w9, [x20]", - "add x20, x20, #0x8 (8)", - "ldr w7, [x20]", - "add x20, x20, #0x4 (4)", - "ldr w6, [x20]", - "add x20, x20, #0x4 (4)", - "ldr w5, [x20]", - "add x20, x20, #0x4 (4)", - "ldr w4, [x20]", - "add x8, x20, #0x4 (4)" + "mov w20, w8", + "ldr w21, [x20]", + "mov w11, w21", + "add x21, x20, #0x4 (4)", + "ldr w20, [x21]", + "mov w10, w20", + "add x20, x21, #0x4 (4)", + "ldr w21, [x20]", + "mov w9, w21", + "add x21, x20, #0x8 (8)", + "ldr w20, [x21]", + "mov w7, w20", + "add x20, x21, #0x4 (4)", + "ldr w21, [x20]", + "mov w6, w21", + "add x21, x20, #0x4 (4)", + "ldr w20, [x21]", + "mov w5, w20", + "add x20, x21, #0x4 (4)", + "ldr w21, [x20]", + "mov w4, w21", + "add x21, x20, #0x4 (4)", + "mov w8, w21" ] }, "aam": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": "0xd4", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "mov w21, #0xa", - "udiv x22, x20, x21", - "udiv x2, x20, x21", - "msub x20, x2, x21, x20", - "add x26, x20, x22, lsl #8", - "bfxil w4, w26, #0, #16", - "cmn wzr, w26, lsl #24" + "mov w20, w4", + "uxtb w21, w20", + "mov w22, #0xa", + "udiv x23, x21, x22", + "udiv x2, x21, x22", + "msub x24, x2, x22, x21", + "add x21, x24, x23, lsl #8", + "mov w22, w20", + "bfxil w22, w21, #0, #16", + "mov w4, w22", + "cmn wzr, w21, lsl #24", + "mov w26, w21" ] }, "aad": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0xd5", "ExpectedArm64ASM": [ - "lsr w20, w4, #8", - "mov w21, #0xa", - "mul x20, x20, x21", - "add x20, x4, x20", - "and x26, x20, #0xff", - "bfxil w4, w26, #0, #16", - "cmn wzr, w26, lsl #24" + "mov w20, w4", + "lsr w21, w20, #8", + "mov w22, #0xa", + "mul x23, x21, x22", + "add x21, x20, x23", + "and x22, x21, #0xff", + "mov w21, w20", + "bfxil w21, w22, #0, #16", + "mov w4, w21", + "cmn wzr, w22, lsl #24", + "mov w26, w22" ] }, "db 0xd4, 0x40": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": [ "aam with a different immediate byte base", "0xd4" ], "ExpectedArm64ASM": [ - "uxtb w20, w4", - "mov w21, #0x40", - "udiv x22, x20, x21", - "udiv x2, x20, x21", - "msub x20, x2, x21, x20", - "add x26, x20, x22, lsl #8", - "bfxil w4, w26, #0, #16", - "cmn wzr, w26, lsl #24" + "mov w20, w4", + "uxtb w21, w20", + "mov w22, #0x40", + "udiv x23, x21, x22", + "udiv x2, x21, x22", + "msub x24, x2, x22, x21", + "add x21, x24, x23, lsl #8", + "mov w22, w20", + "bfxil w22, w21, #0, #16", + "mov w4, w22", + "cmn wzr, w21, lsl #24", + "mov w26, w21" ] }, "db 0xd5, 0x40": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "aad with a different immediate byte base", "0xd5" ], "ExpectedArm64ASM": [ - "lsr w20, w4, #8", - "lsl x20, x20, #6", - "add x20, x4, x20", - "and x26, x20, #0xff", - "bfxil w4, w26, #0, #16", - "cmn wzr, w26, lsl #24" + "mov w20, w4", + "lsr w21, w20, #8", + "lsl x22, x21, #6", + "add x21, x20, x22", + "and x22, x21, #0xff", + "mov w21, w20", + "bfxil w21, w22, #0, #16", + "mov w4, w21", + "cmn wzr, w22, lsl #24", + "mov w26, w22" ] }, "salc": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0xd6", "ExpectedArm64ASM": [ "csetm w20, hs", - "bfxil w4, w20, #0, #8" + "mov w21, w4", + "mov w22, w21", + "bfxil w22, w20, #0, #8", + "mov w4, w22" ] } } diff --git a/unittests/InstructionCountCI/FlagM/Secondary.json b/unittests/InstructionCountCI/FlagM/Secondary.json index 4084ceb643..289a4a496e 100644 --- a/unittests/InstructionCountCI/FlagM/Secondary.json +++ b/unittests/InstructionCountCI/FlagM/Secondary.json @@ -14,1623 +14,2133 @@ }, "Instructions": { "ucomiss xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0x2e", "ExpectedArm64ASM": [ - "fcmp s16, s17", - "mov w27, #0x0", - "cset w26, vc", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fcmp s2, s3", + "mov w20, #0x0", + "cset w21, vc", + "mov x26, x21", "axflag", - "cfinv" + "cfinv", + "mov x27, x20" ] }, "comiss xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0x2f", "ExpectedArm64ASM": [ - "fcmp s16, s17", - "mov w27, #0x0", - "cset w26, vc", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fcmp s2, s3", + "mov w20, #0x0", + "cset w21, vc", + "mov x26, x21", "axflag", - "cfinv" + "cfinv", + "mov x27, x20" ] }, "cmovo ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x40", "ExpectedArm64ASM": [ - "csel w20, w7, w4, vs", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, vs", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovo eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x40", "ExpectedArm64ASM": [ - "csel w4, w7, w4, vs" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, vs", + "mov x4, x22" ] }, "cmovo rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x40", "ExpectedArm64ASM": [ - "csel x4, x7, x4, vs" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, vs", + "mov x4, x22" ] }, "cmovno ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x41", "ExpectedArm64ASM": [ - "csel w20, w7, w4, vc", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, vc", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovno eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x41", "ExpectedArm64ASM": [ - "csel w4, w7, w4, vc" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, vc", + "mov x4, x22" ] }, "cmovno rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x41", "ExpectedArm64ASM": [ - "csel x4, x7, x4, vc" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, vc", + "mov x4, x22" ] }, "cmovb ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x42", "ExpectedArm64ASM": [ - "csel w20, w7, w4, hs", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, hs", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovb eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x42", "ExpectedArm64ASM": [ - "csel w4, w7, w4, hs" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, hs", + "mov x4, x22" ] }, "cmovb rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x42", "ExpectedArm64ASM": [ - "csel x4, x7, x4, hs" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, hs", + "mov x4, x22" ] }, "cmovnb ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x43", "ExpectedArm64ASM": [ - "csel w20, w7, w4, lo", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, lo", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovnb eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x43", "ExpectedArm64ASM": [ - "csel w4, w7, w4, lo" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, lo", + "mov x4, x22" ] }, "cmovnb rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x43", "ExpectedArm64ASM": [ - "csel x4, x7, x4, lo" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, lo", + "mov x4, x22" ] }, "cmovz ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x44", "ExpectedArm64ASM": [ - "csel w20, w7, w4, eq", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, eq", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovz eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x44", "ExpectedArm64ASM": [ - "csel w4, w7, w4, eq" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, eq", + "mov x4, x22" ] }, "cmovz rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x44", "ExpectedArm64ASM": [ - "csel x4, x7, x4, eq" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, eq", + "mov x4, x22" ] }, "cmovnz ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x45", "ExpectedArm64ASM": [ - "csel w20, w7, w4, ne", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, ne", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovnz eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x45", "ExpectedArm64ASM": [ - "csel w4, w7, w4, ne" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, ne", + "mov x4, x22" ] }, "cmovnz rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x45", "ExpectedArm64ASM": [ - "csel x4, x7, x4, ne" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, ne", + "mov x4, x22" ] }, "cmovbe ax, bx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x46", "ExpectedArm64ASM": [ - "csel w20, w7, w4, hs", - "csel w20, w7, w20, eq", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, hs", + "csel w23, w21, w22, eq", + "mov x21, x20", + "bfxil x21, x23, #0, #16", + "mov x4, x21" ] }, "cmovbe eax, ebx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x46", "ExpectedArm64ASM": [ - "csel w20, w7, w4, hs", - "csel w4, w7, w20, eq" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, hs", + "csel w20, w21, w22, eq", + "mov x4, x20" ] }, "cmovbe rax, rbx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x46", "ExpectedArm64ASM": [ - "csel x20, x7, x4, hs", - "csel x4, x7, x20, eq" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, hs", + "csel x20, x21, x22, eq", + "mov x4, x20" ] }, "cmovnbe ax, bx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x47", "ExpectedArm64ASM": [ - "csel w20, w7, w4, lo", - "csel w20, w20, w4, ne", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, lo", + "csel w21, w22, w20, ne", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "cmovnbe eax, ebx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x47", "ExpectedArm64ASM": [ - "csel w20, w7, w4, lo", - "csel w4, w20, w4, ne" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, lo", + "csel w21, w22, w20, ne", + "mov x4, x21" ] }, "cmovnbe rax, rbx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x47", "ExpectedArm64ASM": [ - "csel x20, x7, x4, lo", - "csel x4, x20, x4, ne" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, lo", + "csel x21, x22, x20, ne", + "mov x4, x21" ] }, "cmovs ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x48", "ExpectedArm64ASM": [ - "csel w20, w7, w4, mi", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, mi", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovs eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x48", "ExpectedArm64ASM": [ - "csel w4, w7, w4, mi" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, mi", + "mov x4, x22" ] }, "cmovs rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x48", "ExpectedArm64ASM": [ - "csel x4, x7, x4, mi" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, mi", + "mov x4, x22" ] }, "cmovns ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x49", "ExpectedArm64ASM": [ - "csel w20, w7, w4, pl", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, pl", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovns eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x49", "ExpectedArm64ASM": [ - "csel w4, w7, w4, pl" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, pl", + "mov x4, x22" ] }, "cmovns rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x49", "ExpectedArm64ASM": [ - "csel x4, x7, x4, pl" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, pl", + "mov x4, x22" ] }, "cmovpe ax, bx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0x4a", "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csel w20, w7, w4, ne", - "bfxil x4, x20, #0, #16", - "msr nzcv, x21" + "mov x20, x4", + "mov x21, x7", + "mov x22, x26", + "eor w23, w22, w22, lsr #4", + "eor w22, w23, w23, lsr #2", + "eon w23, w22, w22, lsr #1", + "mrs x22, nzcv", + "tst w23, #0x1", + "csel w23, w21, w20, ne", + "mov x21, x20", + "bfxil x21, x23, #0, #16", + "mov x4, x21", + "msr nzcv, x22" ] }, "cmovpe eax, ebx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0x4a", "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csel w4, w7, w4, ne", - "msr nzcv, x21" + "mov x20, x4", + "mov x21, x7", + "mov x22, x26", + "eor w23, w22, w22, lsr #4", + "eor w22, w23, w23, lsr #2", + "eon w23, w22, w22, lsr #1", + "mrs x22, nzcv", + "tst w23, #0x1", + "csel w23, w21, w20, ne", + "mov x4, x23", + "msr nzcv, x22" ] }, "cmovpe rax, rbx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0x4a", "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csel x4, x7, x4, ne", - "msr nzcv, x21" + "mov x20, x4", + "mov x21, x7", + "mov x22, x26", + "eor w23, w22, w22, lsr #4", + "eor w22, w23, w23, lsr #2", + "eon w23, w22, w22, lsr #1", + "mrs x22, nzcv", + "tst w23, #0x1", + "csel x23, x21, x20, ne", + "mov x4, x23", + "msr nzcv, x22" ] }, "cmovnp ax, bx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0x4b", "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csel w20, w7, w4, ne", - "bfxil x4, x20, #0, #16", - "msr nzcv, x21" + "mov x20, x4", + "mov x21, x7", + "mov x22, x26", + "eor w23, w22, w22, lsr #4", + "eor w22, w23, w23, lsr #2", + "eor w23, w22, w22, lsr #1", + "mrs x22, nzcv", + "tst w23, #0x1", + "csel w23, w21, w20, ne", + "mov x21, x20", + "bfxil x21, x23, #0, #16", + "mov x4, x21", + "msr nzcv, x22" ] }, "cmovnp eax, ebx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0x4b", "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csel w4, w7, w4, ne", - "msr nzcv, x21" + "mov x20, x4", + "mov x21, x7", + "mov x22, x26", + "eor w23, w22, w22, lsr #4", + "eor w22, w23, w23, lsr #2", + "eor w23, w22, w22, lsr #1", + "mrs x22, nzcv", + "tst w23, #0x1", + "csel w23, w21, w20, ne", + "mov x4, x23", + "msr nzcv, x22" ] }, "cmovnp rax, rbx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0x4b", "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csel x4, x7, x4, ne", - "msr nzcv, x21" + "mov x20, x4", + "mov x21, x7", + "mov x22, x26", + "eor w23, w22, w22, lsr #4", + "eor w22, w23, w23, lsr #2", + "eor w23, w22, w22, lsr #1", + "mrs x22, nzcv", + "tst w23, #0x1", + "csel x23, x21, x20, ne", + "mov x4, x23", + "msr nzcv, x22" ] }, "cmovl ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x4c", "ExpectedArm64ASM": [ - "csel w20, w7, w4, lt", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, lt", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovl eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x4c", "ExpectedArm64ASM": [ - "csel w4, w7, w4, lt" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, lt", + "mov x4, x22" ] }, "cmovl rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x4c", "ExpectedArm64ASM": [ - "csel x4, x7, x4, lt" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, lt", + "mov x4, x22" ] }, "cmovnl ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x4d", "ExpectedArm64ASM": [ - "csel w20, w7, w4, ge", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, ge", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovnl eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x4d", "ExpectedArm64ASM": [ - "csel w4, w7, w4, ge" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, ge", + "mov x4, x22" ] }, "cmovnl rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x4d", "ExpectedArm64ASM": [ - "csel x4, x7, x4, ge" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, ge", + "mov x4, x22" ] }, "cmovle ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x4e", "ExpectedArm64ASM": [ - "csel w20, w7, w4, le", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, le", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovle eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x4e", "ExpectedArm64ASM": [ - "csel w4, w7, w4, le" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, le", + "mov x4, x22" ] }, "cmovle rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x4e", "ExpectedArm64ASM": [ - "csel x4, x7, x4, le" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, le", + "mov x4, x22" ] }, "cmovnle ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x4f", "ExpectedArm64ASM": [ - "csel w20, w7, w4, gt", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, gt", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovnle eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x4f", "ExpectedArm64ASM": [ - "csel w4, w7, w4, gt" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, gt", + "mov x4, x22" ] }, "cmovnle rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x4f", "ExpectedArm64ASM": [ - "csel x4, x7, x4, gt" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, gt", + "mov x4, x22" ] }, "seto al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x90", "ExpectedArm64ASM": [ "cset x20, vs", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setno al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x91", "ExpectedArm64ASM": [ "cset x20, vc", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setb al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x92", "ExpectedArm64ASM": [ "cset x20, hs", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setnb al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x93", "ExpectedArm64ASM": [ "cset x20, lo", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setz al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x94", "ExpectedArm64ASM": [ "cset x20, eq", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setnz al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x95", "ExpectedArm64ASM": [ "cset x20, ne", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setbe al": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x96", "ExpectedArm64ASM": [ "mov w20, #0x1", "cset x21, hs", - "csel x20, x20, x21, eq", - "bfxil x4, x20, #0, #8" + "csel x22, x20, x21, eq", + "mov x20, x4", + "mov x21, x20", + "bfxil x21, x22, #0, #8", + "mov x4, x21" ] }, "setnbe al": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x97", "ExpectedArm64ASM": [ "cset x20, lo", - "csel x20, x20, xzr, ne", - "bfxil x4, x20, #0, #8" + "csel x21, x20, xzr, ne", + "mov x20, x4", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22" ] }, "sets al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x98", "ExpectedArm64ASM": [ "cset x20, mi", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setns al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x99", "ExpectedArm64ASM": [ "cset x20, pl", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setpe al": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0x9a", "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "and x20, x20, #0x1", - "bfxil x4, x20, #0, #8" + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "and x20, x21, #0x1", + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setnp al": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0x9b", "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "and x20, x20, #0x1", - "bfxil x4, x20, #0, #8" + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "and x20, x21, #0x1", + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setl al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x9c", "ExpectedArm64ASM": [ "cset x20, lt", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setnl al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x9d", "ExpectedArm64ASM": [ "cset x20, ge", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setle al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x9e", "ExpectedArm64ASM": [ "cset x20, le", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setnle al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x9f", "ExpectedArm64ASM": [ "cset x20, gt", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "bt ax, bx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xa3", "ExpectedArm64ASM": [ - "and x20, x7, #0xf", - "lsr w20, w4, w20", + "mov x20, x7", + "mov x21, x4", + "and x22, x20, #0xf", + "lsr w20, w21, w22", "rmif x20, #63, #nzCv" ] }, "bt [rax], bx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0xa3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #13", - "ldrb w21, [x4, x21, sxtx]", - "lsr w20, w21, w20", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #13", + "ldrb w20, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "rmif x21, #63, #nzCv" ] }, "bt eax, ebx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0xa3", "ExpectedArm64ASM": [ - "lsr w20, w4, w7", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "mov x21, x4", + "lsr w22, w21, w20", + "rmif x22, #63, #nzCv" ] }, "bt [rax], ebx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0xa3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #29", - "ldrb w21, [x4, x21, sxtx]", - "lsr w20, w21, w20", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #29", + "ldrb w20, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "rmif x21, #63, #nzCv" ] }, "bt rax, rbx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0xa3", "ExpectedArm64ASM": [ - "lsr x20, x4, x7", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "mov x21, x4", + "lsr x22, x21, x20", + "rmif x22, #63, #nzCv" ] }, "bt [rax], rbx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0xa3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "asr x21, x7, #3", - "ldrb w21, [x4, x21, sxtx]", - "lsr w20, w21, w20", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "asr x23, x20, #3", + "ldrb w20, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "rmif x21, #63, #nzCv" ] }, "shld ax, bx, 1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 15, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w4", - "lsl x22, x21, #1", - "lsr w20, w20, #15", - "orr x26, x22, x20", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "rmif x21, #14, #nzCv", - "eor w20, w26, w21", + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "uxth w22, w20", + "lsl x23, x22, #1", + "lsr w24, w21, #15", + "orr x21, x23, x24", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "cmn wzr, w21, lsl #16", + "rmif x22, #14, #nzCv", + "mov x26, x21", + "eor w20, w21, w22", "rmif x20, #15, #nzcV" ] }, "shld ax, bx, 15": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w4", - "lsl x22, x21, #15", - "lsr w20, w20, #1", - "orr x26, x22, x20", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "rmif x21, #0, #nzCv" + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "uxth w22, w20", + "lsl x23, x22, #15", + "lsr w24, w21, #1", + "orr x21, x23, x24", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "cmn wzr, w21, lsl #16", + "rmif x22, #0, #nzCv", + "mov x26, x21" ] }, "shld ax, bx, 16": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 12, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w4", - "lsl x22, x21, #16", - "orr x26, x22, x20", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "rmif x21, #63, #nzCv" + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "uxth w22, w20", + "lsl x23, x22, #16", + "orr x24, x23, x21", + "mov x21, x20", + "bfxil x21, x24, #0, #16", + "mov x4, x21", + "cmn wzr, w24, lsl #16", + "rmif x22, #63, #nzCv", + "mov x26, x24" ] }, "shld ax, bx, 31": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w4", - "lsl x22, x21, #31", - "lsr w20, w20, #17", - "orr x26, x22, x20", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "rmif x21, #0, #nzCv" + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "uxth w22, w20", + "lsl x23, x22, #31", + "lsr w24, w21, #17", + "orr x21, x23, x24", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "cmn wzr, w21, lsl #16", + "rmif x22, #0, #nzCv", + "mov x26, x21" ] }, "shld eax, ebx, 1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "extr w4, w21, w20, #31", - "tst w4, w4", - "rmif x21, #30, #nzCv", - "mov x26, x4", - "eor w20, w4, w21", - "rmif x20, #31, #nzcV" + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "mov w22, w20", + "extr w20, w22, w21, #31", + "mov x4, x20", + "tst w20, w20", + "rmif x22, #30, #nzCv", + "mov x26, x20", + "eor w21, w20, w22", + "rmif x21, #31, #nzcV" ] }, "shld eax, ebx, 15": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "extr w4, w21, w20, #17", - "tst w4, w4", - "rmif x21, #16, #nzCv", - "mov x26, x4" + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "mov w22, w20", + "extr w20, w22, w21, #17", + "mov x4, x20", + "tst w20, w20", + "rmif x22, #16, #nzCv", + "mov x26, x20" ] }, "shld eax, ebx, 16": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "extr w4, w21, w20, #16", - "tst w4, w4", - "rmif x21, #15, #nzCv", - "mov x26, x4" + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "mov w22, w20", + "extr w20, w22, w21, #16", + "mov x4, x20", + "tst w20, w20", + "rmif x22, #15, #nzCv", + "mov x26, x20" ] }, "shld eax, ebx, 31": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "extr w4, w21, w20, #1", - "tst w4, w4", - "rmif x21, #0, #nzCv", - "mov x26, x4" + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "mov w22, w20", + "extr w20, w22, w21, #1", + "mov x4, x20", + "tst w20, w20", + "rmif x22, #0, #nzCv", + "mov x26, x20" ] }, "shld rax, rbx, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "mov x20, x4", - "extr x4, x20, x7, #63", - "tst x4, x4", - "rmif x20, #62, #nzCv", - "mov x26, x4", - "eor x20, x4, x20", + "mov x20, x7", + "mov x21, x4", + "extr x22, x21, x20, #63", + "mov x4, x22", + "tst x22, x22", + "rmif x21, #62, #nzCv", + "mov x26, x22", + "eor x20, x22, x21", "rmif x20, #63, #nzcV" ] }, "shld rax, rbx, 15": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "mov x20, x4", - "extr x4, x20, x7, #49", - "tst x4, x4", - "rmif x20, #48, #nzCv", - "mov x26, x4" + "mov x20, x7", + "mov x21, x4", + "extr x22, x21, x20, #49", + "mov x4, x22", + "tst x22, x22", + "rmif x21, #48, #nzCv", + "mov x26, x22" ] }, "shld rax, rbx, 32": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "mov x20, x4", - "extr x4, x20, x7, #32", - "tst x4, x4", - "rmif x20, #31, #nzCv", - "mov x26, x4" + "mov x20, x7", + "mov x21, x4", + "extr x22, x21, x20, #32", + "mov x4, x22", + "tst x22, x22", + "rmif x21, #31, #nzCv", + "mov x26, x22" ] }, "shld rax, rbx, 63": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "mov x20, x4", - "extr x4, x20, x7, #1", - "tst x4, x4", - "rmif x20, #0, #nzCv", - "mov x26, x4" + "mov x20, x7", + "mov x21, x4", + "extr x22, x21, x20, #1", + "mov x4, x22", + "tst x22, x22", + "rmif x21, #0, #nzCv", + "mov x26, x22" ] }, "shld ax, bx, cl": { - "ExpectedInstructionCount": 22, + "ExpectedInstructionCount": 30, "Comment": "0x0f 0xad", "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w4", - "and x22, x5, #0x1f", + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "uxth w22, w20", + "mov x23, x5", + "and x24, x23, #0x1f", "mov w23, #0x10", - "sub x23, x23, x22", - "lsl x24, x21, x22", - "lsr w20, w20, w23", - "orr x20, x24, x20", + "sub x25, x23, x24", + "lsl x23, x22, x24", + "lsr w30, w21, w25", + "orr x21, x23, x30", "mrs x23, nzcv", - "cmp x22, #0x0 (0)", - "csel x20, x21, x20, eq", - "bfxil x4, x20, #0, #16", + "cmp x24, #0x0 (0)", + "csel x25, x22, x21, eq", + "mov x21, x20", + "bfxil x21, x25, #0, #16", + "mov x4, x21", + "mov x20, x26", "msr nzcv, x23", - "cbz w22, #+0x24", - "cmn wzr, w20, lsl #16", - "mov x26, x20", + "mov x21, x20", + "cbz w24, #+0x24", + "cmn wzr, w25, lsl #16", + "mov x21, x25", "mov w0, #0x10", - "sub w0, w0, w22", - "lsr w0, w21, w0", - "eor w2, w21, w20", + "sub w0, w0, w24", + "lsr w0, w22, w0", + "eor w2, w22, w25", "rmif x0, #63, #nzCv", - "rmif x2, #15, #nzcV" + "rmif x2, #15, #nzcV", + "mov x26, x21" ] }, "shld eax, ebx, cl": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 26, "Comment": "0x0f 0xad", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "and x22, x5, #0x1f", - "neg x23, x22", - "lsl x24, x21, x22", - "lsr w20, w20, w23", - "orr x20, x24, x20", - "mrs x23, nzcv", - "cmp x22, #0x0 (0)", - "csel x20, x21, x20, eq", - "mov w4, w20", - "msr nzcv, x23", - "cbz w22, #+0x1c", - "ands w26, w20, w20", - "neg w0, w22", - "lsr w0, w21, w0", - "eor w2, w21, w20", + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "mov w22, w20", + "mov x20, x5", + "and x23, x20, #0x1f", + "neg x20, x23", + "lsl x24, x22, x23", + "lsr w25, w21, w20", + "orr x20, x24, x25", + "mrs x21, nzcv", + "cmp x23, #0x0 (0)", + "csel x24, x22, x20, eq", + "mov w20, w24", + "mov x4, x20", + "mov x20, x26", + "msr nzcv, x21", + "mov x21, x20", + "cbz w23, #+0x1c", + "ands w21, w24, w24", + "neg w0, w23", + "lsr w0, w22, w0", + "eor w2, w22, w24", "rmif x0, #63, #nzCv", - "rmif x2, #31, #nzcV" + "rmif x2, #31, #nzcV", + "mov x26, x21" ] }, "shld rax, rbx, cl": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 23, "Comment": "0x0f 0xad", "ExpectedArm64ASM": [ - "mov x20, x4", - "and x21, x5, #0x3f", - "neg x22, x21", - "lsl x23, x20, x21", - "lsr x22, x7, x22", - "orr x22, x23, x22", - "mrs x23, nzcv", - "cmp x21, #0x0 (0)", - "csel x4, x20, x22, eq", - "msr nzcv, x23", - "cbz x21, #+0x1c", - "ands x26, x4, x4", - "neg x0, x21", - "lsr x0, x20, x0", - "eor x2, x20, x4", + "mov x20, x7", + "mov x21, x4", + "mov x22, x5", + "and x23, x22, #0x3f", + "neg x22, x23", + "lsl x24, x21, x23", + "lsr x25, x20, x22", + "orr x20, x24, x25", + "mrs x22, nzcv", + "cmp x23, #0x0 (0)", + "csel x24, x21, x20, eq", + "mov x4, x24", + "mov x20, x26", + "msr nzcv, x22", + "mov x22, x20", + "cbz x23, #+0x1c", + "ands x22, x24, x24", + "neg x0, x23", + "lsr x0, x21, x0", + "eor x2, x21, x24", "rmif x0, #63, #nzCv", - "rmif x2, #63, #nzcV" + "rmif x2, #63, #nzcV", + "mov x26, x22" ] }, "bts ax, bx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0xab", "ExpectedArm64ASM": [ - "and x20, x7, #0xf", - "lsr w21, w4, w20", - "rmif x21, #63, #nzCv", - "mov w21, #0x1", - "lsl w20, w21, w20", - "orr w20, w4, w20", - "bfxil x4, x20, #0, #16" + "mov x20, x7", + "mov x21, x4", + "and x22, x20, #0xf", + "lsr w20, w21, w22", + "rmif x20, #63, #nzCv", + "mov w20, #0x1", + "lsl w23, w20, w22", + "orr w20, w21, w23", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "bts [rax], bx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0xab", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #13", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "orr x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #13", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "orr x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "rmif x21, #63, #nzCv" ] }, "bts eax, ebx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xab", "ExpectedArm64ASM": [ - "lsr w20, w4, w7", - "rmif x20, #63, #nzCv", - "mov w20, #0x1", - "lsl w20, w20, w7", - "orr w4, w4, w20" + "mov x20, x7", + "mov x21, x4", + "lsr w22, w21, w20", + "rmif x22, #63, #nzCv", + "mov w22, #0x1", + "lsl w23, w22, w20", + "orr w20, w21, w23", + "mov x4, x20" ] }, "bts [rax], ebx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0xab", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #29", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "orr x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #29", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "orr x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "rmif x21, #63, #nzCv" ] }, "bts rax, rbx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xab", "ExpectedArm64ASM": [ - "lsr x20, x4, x7", - "rmif x20, #63, #nzCv", - "mov w20, #0x1", - "lsl x20, x20, x7", - "orr x4, x4, x20" + "mov x20, x7", + "mov x21, x4", + "lsr x22, x21, x20", + "rmif x22, #63, #nzCv", + "mov w22, #0x1", + "lsl x23, x22, x20", + "orr x20, x21, x23", + "mov x4, x20" ] }, "bts [rax], rbx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0xab", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "asr x21, x7, #3", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "orr x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "asr x23, x20, #3", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "orr x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "rmif x21, #63, #nzCv" ] }, "lock bts [rax], bx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #13", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldsetalb w22, w21, [x21]", - "lsr w20, w21, w20", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #13", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldsetalb w23, w21, [x20]", + "lsr w20, w21, w22", "rmif x20, #63, #nzCv" ] }, "lock bts [rax], ebx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #29", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldsetalb w22, w21, [x21]", - "lsr w20, w21, w20", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #29", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldsetalb w23, w21, [x20]", + "lsr w20, w21, w22", "rmif x20, #63, #nzCv" ] }, "lock bts [rax], rbx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "asr x21, x7, #3", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldsetalb w22, w21, [x21]", - "lsr w20, w21, w20", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "asr x23, x20, #3", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldsetalb w23, w21, [x20]", + "lsr w20, w21, w22", "rmif x20, #63, #nzCv" ] }, "imul ax, bx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": "0x0f 0xaf", "ExpectedArm64ASM": [ - "sxth x20, w4", - "sxth x21, w7", - "mul x20, x20, x21", - "sbfx x21, x20, #16, #16", - "bfxil x4, x20, #0, #16", - "sbfx x20, x20, #15, #1", - "cmp x21, x20", + "mov x20, x4", + "mov x21, x7", + "sxth x22, w20", + "sxth x23, w21", + "mul x21, x22, x23", + "sbfx x22, x21, #16, #16", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "sbfx x20, x21, #15, #1", + "cmp x22, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "imul eax, ebx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xaf", "ExpectedArm64ASM": [ - "smull x20, w4, w7", - "asr x20, x20, #32", - "mul w4, w4, w7", - "sbfx x21, x4, #31, #1", - "cmp x20, x21", + "mov x20, x4", + "mov x21, x7", + "smull x22, w20, w21", + "asr x23, x22, #32", + "mul w22, w20, w21", + "mov x4, x22", + "sbfx x20, x22, #31, #1", + "cmp x23, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "imul rax, rbx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xaf", "ExpectedArm64ASM": [ - "smulh x20, x4, x7", - "mul x4, x4, x7", - "asr x21, x4, #63", - "cmp x20, x21", + "mov x20, x4", + "mov x21, x7", + "smulh x22, x20, x21", + "mul x23, x20, x21", + "mov x4, x23", + "asr x20, x23, #63", + "cmp x22, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "cmpxchg cl, bl": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 20, "ExpectedArm64ASM": [ - "uxtb w20, w7", - "uxtb w21, w5", - "uxtb x22, w4", - "eor w27, w22, w21", - "lsl w0, w22, #24", - "cmp w0, w21, lsl #24", - "sub w26, w22, w21", + "mov x20, x7", + "uxtb w21, w20", + "mov x20, x5", + "uxtb w22, w20", + "mov x23, x4", + "uxtb x24, w23", + "eor w25, w24, w22", + "mov x27, x25", + "lsl w0, w24, #24", + "cmp w0, w22, lsl #24", + "sub w25, w24, w22", + "mov x26, x25", "cfinv", - "bfxil x4, x21, #0, #8", - "csel x20, x20, x21, eq", - "bfxil x5, x20, #0, #8" + "mov x24, x23", + "bfxil x24, x22, #0, #8", + "mov x4, x24", + "csel x23, x21, x22, eq", + "mov x21, x20", + "bfxil x21, x23, #0, #8", + "mov x5, x21" ] }, "cmpxchg cx, bx": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 20, "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w5", - "uxth x22, w4", - "eor w27, w22, w21", - "lsl w0, w22, #16", - "cmp w0, w21, lsl #16", - "sub w26, w22, w21", + "mov x20, x7", + "uxth w21, w20", + "mov x20, x5", + "uxth w22, w20", + "mov x23, x4", + "uxth x24, w23", + "eor w25, w24, w22", + "mov x27, x25", + "lsl w0, w24, #16", + "cmp w0, w22, lsl #16", + "sub w25, w24, w22", + "mov x26, x25", "cfinv", - "bfxil x4, x21, #0, #16", - "csel x20, x20, x21, eq", - "bfxil x5, x20, #0, #16" + "mov x24, x23", + "bfxil x24, x22, #0, #16", + "mov x4, x24", + "csel x23, x21, x22, eq", + "mov x21, x20", + "bfxil x21, x23, #0, #16", + "mov x5, x21" ] }, "cmpxchg ecx, ebx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 15, "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w5", - "mov w22, w4", - "eor w27, w22, w21", - "subs w26, w22, w21", + "mov x20, x7", + "mov w21, w20", + "mov x20, x5", + "mov x22, x4", + "mov w23, w20", + "mov w24, w22", + "eor w25, w24, w23", + "mov x27, x25", + "subs w25, w24, w23", + "mov x26, x25", "cfinv", - "csel x4, x4, x21, eq", - "csel x5, x20, x5, eq" + "csel x24, x22, x23, eq", + "mov x4, x24", + "csel x22, x21, x20, eq", + "mov x5, x22" ] }, "cmpxchg rcx, rbx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "ExpectedArm64ASM": [ - "mov x20, x5", - "eor w27, w4, w20", - "subs x26, x4, x20", + "mov x20, x7", + "mov x21, x5", + "mov x22, x4", + "eor w23, w22, w21", + "mov x27, x23", + "subs x23, x22, x21", + "mov x26, x23", "cfinv", - "mov x4, x20", - "csel x5, x7, x20, eq" + "mov x4, x21", + "csel x22, x20, x21, eq", + "mov x5, x22" ] }, "cmpxchg [rax], rbx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0xb1", "ExpectedArm64ASM": [ - "mov x20, x4", - "mov x1, x20", - "casal x1, x7, [x20]", - "mov x4, x1", - "eor w27, w20, w4", - "subs x26, x20, x4", + "mov x20, x7", + "mov x21, x4", + "mov x1, x21", + "casal x1, x20, [x21]", + "mov x22, x1", + "mov x4, x22", + "eor w20, w21, w22", + "mov x27, x20", + "subs x20, x21, x22", + "mov x26, x20", "cfinv" ] }, "cmpxchg al, bl": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 15, "Comment": "0x0f 0xb0", "ExpectedArm64ASM": [ - "uxtb w20, w7", - "uxtb w21, w4", - "uxtb x22, w4", - "eor w27, w22, w21", - "lsl w0, w22, #24", - "cmp w0, w21, lsl #24", - "sub w26, w22, w21", + "mov x20, x7", + "uxtb w21, w20", + "mov x20, x4", + "uxtb w22, w20", + "uxtb x23, w20", + "eor w24, w23, w22", + "mov x27, x24", + "lsl w0, w23, #24", + "cmp w0, w22, lsl #24", + "sub w24, w23, w22", + "mov x26, x24", "cfinv", - "bfxil x4, x20, #0, #8" + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22" ] }, "cmpxchg [rax], bl": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 17, "Comment": "0x0f 0xb0", "ExpectedArm64ASM": [ - "uxtb w20, w7", - "uxtb w21, w4", - "mov w1, w21", - "casalb w1, w20, [x4]", - "mov w20, w1", - "bfxil x4, x20, #0, #8", - "eor w27, w21, w20", - "lsl w0, w21, #24", - "cmp w0, w20, lsl #24", - "sub w26, w21, w20", + "mov x20, x7", + "uxtb w21, w20", + "mov x20, x4", + "uxtb w22, w20", + "mov w1, w22", + "casalb w1, w21, [x20]", + "mov w23, w1", + "mov x21, x20", + "bfxil x21, x23, #0, #8", + "mov x4, x21", + "eor w20, w22, w23", + "mov x27, x20", + "lsl w0, w22, #24", + "cmp w0, w23, lsl #24", + "sub w20, w22, w23", + "mov x26, x20", "cfinv" ] }, "cmpxchg ax, bx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 15, "Comment": "0x0f 0xb1", "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w4", - "uxth x22, w4", - "eor w27, w22, w21", - "lsl w0, w22, #16", - "cmp w0, w21, lsl #16", - "sub w26, w22, w21", + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "uxth w22, w20", + "uxth x23, w20", + "eor w24, w23, w22", + "mov x27, x24", + "lsl w0, w23, #16", + "cmp w0, w22, lsl #16", + "sub w24, w23, w22", + "mov x26, x24", "cfinv", - "bfxil x4, x20, #0, #16" + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "cmpxchg [rax], bx": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 17, "Comment": "0x0f 0xb1", "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w4", - "mov w1, w21", - "casalh w1, w20, [x4]", - "mov w20, w1", - "bfxil x4, x20, #0, #16", - "eor w27, w21, w20", - "lsl w0, w21, #16", - "cmp w0, w20, lsl #16", - "sub w26, w21, w20", + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "uxth w22, w20", + "mov w1, w22", + "casalh w1, w21, [x20]", + "mov w23, w1", + "mov x21, x20", + "bfxil x21, x23, #0, #16", + "mov x4, x21", + "eor w20, w22, w23", + "mov x27, x20", + "lsl w0, w22, #16", + "cmp w0, w23, lsl #16", + "sub w20, w22, w23", + "mov x26, x20", "cfinv" ] }, "cmpxchg eax, ebx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0xb1", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "mov w22, w4", - "eor w27, w22, w21", - "subs w26, w22, w21", + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "mov w22, w20", + "mov w23, w20", + "eor w20, w23, w22", + "mov x27, x20", + "subs w20, w23, w22", + "mov x26, x20", "cfinv", - "mov x4, x20" + "mov x4, x21" ] }, "cmpxchg [rax], ebx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 15, "Comment": "0x0f 0xb1", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "mov w1, w21", - "casal w1, w20, [x4]", - "mov w20, w1", - "cmp w20, w21", - "csel x4, x4, x20, eq", - "eor w27, w21, w20", - "subs w26, w21, w20", + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "mov w22, w20", + "mov w1, w22", + "casal w1, w21, [x20]", + "mov w23, w1", + "cmp w23, w22", + "csel x21, x20, x23, eq", + "mov x4, x21", + "eor w20, w22, w23", + "mov x27, x20", + "subs w20, w22, w23", + "mov x26, x20", "cfinv" ] }, "cmpxchg rax, rbx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xb1", "ExpectedArm64ASM": [ "mov x20, x7", - "mov w27, #0x0", - "subs x26, x4, x4", + "mov x21, x4", + "mov w22, #0x0", + "mov x27, x22", + "subs x22, x21, x21", + "mov x26, x22", "cfinv", "mov x4, x20" ] }, "btr ax, bx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "and x20, x7, #0xf", - "lsr w21, w4, w20", - "rmif x21, #63, #nzCv", - "mov w21, #0x1", - "lsl w20, w21, w20", - "bic w20, w4, w20", - "bfxil x4, x20, #0, #16" + "mov x20, x7", + "mov x21, x4", + "and x22, x20, #0xf", + "lsr w20, w21, w22", + "rmif x20, #63, #nzCv", + "mov w20, #0x1", + "lsl w23, w20, w22", + "bic w20, w21, w23", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "btr [rax], bx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #13", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "bic x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #13", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "bic x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "rmif x21, #63, #nzCv" ] }, "btr eax, ebx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "lsr w20, w4, w7", - "rmif x20, #63, #nzCv", - "mov w20, #0x1", - "lsl w20, w20, w7", - "bic w4, w4, w20" + "mov x20, x7", + "mov x21, x4", + "lsr w22, w21, w20", + "rmif x22, #63, #nzCv", + "mov w22, #0x1", + "lsl w23, w22, w20", + "bic w20, w21, w23", + "mov x4, x20" ] }, "btr [rax], ebx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #29", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "bic x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #29", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "bic x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "rmif x21, #63, #nzCv" ] }, "btr rax, rbx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "lsr x20, x4, x7", - "rmif x20, #63, #nzCv", - "mov w20, #0x1", - "lsl x20, x20, x7", - "bic x4, x4, x20" + "mov x20, x7", + "mov x21, x4", + "lsr x22, x21, x20", + "rmif x22, #63, #nzCv", + "mov w22, #0x1", + "lsl x23, x22, x20", + "bic x20, x21, x23", + "mov x4, x20" ] }, "btr [rax], rbx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "asr x21, x7, #3", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "bic x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "asr x23, x20, #3", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "bic x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "rmif x21, #63, #nzCv" ] }, "lock btr [rax], bx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #13", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldclralb w22, w21, [x21]", - "lsr w20, w21, w20", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #13", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldclralb w23, w21, [x20]", + "lsr w20, w21, w22", "rmif x20, #63, #nzCv" ] }, "lock btr [rax], ebx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #29", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldclralb w22, w21, [x21]", - "lsr w20, w21, w20", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #29", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldclralb w23, w21, [x20]", + "lsr w20, w21, w22", "rmif x20, #63, #nzCv" ] }, "lock btr [rax], rbx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "asr x21, x7, #3", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldclralb w22, w21, [x21]", - "lsr w20, w21, w20", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "asr x23, x20, #3", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldclralb w23, w21, [x20]", + "lsr w20, w21, w22", "rmif x20, #63, #nzCv" ] }, "btc ax, bx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0xbb", "ExpectedArm64ASM": [ - "and x20, x7, #0xf", - "lsr w21, w4, w20", - "rmif x21, #63, #nzCv", - "mov w21, #0x1", - "lsl w20, w21, w20", - "eor w20, w4, w20", - "bfxil x4, x20, #0, #16" + "mov x20, x7", + "mov x21, x4", + "and x22, x20, #0xf", + "lsr w20, w21, w22", + "rmif x20, #63, #nzCv", + "mov w20, #0x1", + "lsl w23, w20, w22", + "eor w20, w21, w23", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "btc [rax], bx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0xbb", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #13", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "eor x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #13", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "eor x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "rmif x21, #63, #nzCv" ] }, "btc eax, ebx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xbb", "ExpectedArm64ASM": [ - "lsr w20, w4, w7", - "rmif x20, #63, #nzCv", - "mov w20, #0x1", - "lsl w20, w20, w7", - "eor w4, w4, w20" + "mov x20, x7", + "mov x21, x4", + "lsr w22, w21, w20", + "rmif x22, #63, #nzCv", + "mov w22, #0x1", + "lsl w23, w22, w20", + "eor w20, w21, w23", + "mov x4, x20" ] }, "btc [rax], ebx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0xbb", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #29", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "eor x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #29", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "eor x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "rmif x21, #63, #nzCv" ] }, "btc rax, rbx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xbb", "ExpectedArm64ASM": [ - "lsr x20, x4, x7", - "rmif x20, #63, #nzCv", - "mov w20, #0x1", - "lsl x20, x20, x7", - "eor x4, x4, x20" + "mov x20, x7", + "mov x21, x4", + "lsr x22, x21, x20", + "rmif x22, #63, #nzCv", + "mov w22, #0x1", + "lsl x23, x22, x20", + "eor x20, x21, x23", + "mov x4, x20" ] }, "btc [rax], rbx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0xbb", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "asr x21, x7, #3", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "eor x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "asr x23, x20, #3", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "eor x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "rmif x21, #63, #nzCv" ] }, "lock btc [rax], bx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #13", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldeoralb w22, w21, [x21]", - "lsr w20, w21, w20", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #13", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldeoralb w23, w21, [x20]", + "lsr w20, w21, w22", "rmif x20, #63, #nzCv" ] }, "lock btc [rax], ebx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #29", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldeoralb w22, w21, [x21]", - "lsr w20, w21, w20", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #29", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldeoralb w23, w21, [x20]", + "lsr w20, w21, w22", "rmif x20, #63, #nzCv" ] }, "lock btc [rax], rbx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "asr x21, x7, #3", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldeoralb w22, w21, [x21]", - "lsr w20, w21, w20", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "asr x23, x20, #3", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldeoralb w23, w21, [x20]", + "lsr w20, w21, w22", "rmif x20, #63, #nzCv" ] }, "bsf ax, bx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 14, "Comment": "0x0f 0xbc", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w7", - "uxth w0, w21", + "mov x20, x4", + "uxth w21, w20", + "mov x22, x7", + "uxth w23, w22", + "uxth w0, w23", "cmp w0, #0x0 (0)", "rbit w0, w0", "clz w22, w0", "csinv w22, w22, wzr, ne", - "cmn wzr, w21, lsl #16", - "csel x20, x20, x22, eq", - "bfxil x4, x20, #0, #16" + "cmn wzr, w23, lsl #16", + "csel x23, x21, x22, eq", + "mov x21, x20", + "bfxil x21, x23, #0, #16", + "mov x4, x21" ] }, "bsf eax, ebx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0xbc", "ExpectedArm64ASM": [ - "mov w20, w7", - "lsr w0, w20, #0", + "mov x20, x4", + "mov x21, x7", + "mov w22, w21", + "lsr w0, w22, #0", "cmp w0, #0x0 (0)", "rbit w0, w0", "clz w21, w0", "csinv w21, w21, wzr, ne", - "tst w20, w20", - "csel x4, x4, x21, eq" + "tst w22, w22", + "csel x22, x20, x21, eq", + "mov x4, x22" ] }, "bsf rax, rbx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xbc", "ExpectedArm64ASM": [ - "rbit x0, x7", - "cmp x7, #0x0 (0)", - "clz x20, x0", - "csinv x20, x20, xzr, ne", - "tst x7, x7", - "csel x4, x4, x20, eq" + "mov x20, x4", + "mov x21, x7", + "rbit x0, x21", + "cmp x21, #0x0 (0)", + "clz x22, x0", + "csinv x22, x22, xzr, ne", + "tst x21, x21", + "csel x21, x20, x22, eq", + "mov x4, x21" ] }, "bsr ax, bx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 14, "Comment": "0x0f 0xbd", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w7", + "mov x20, x4", + "uxth w21, w20", + "mov x22, x7", + "uxth w23, w22", "mov x0, #0xf", - "lsl w22, w21, #16", + "lsl w22, w23, #16", "orr w22, w22, #0x8000", "clz w22, w22", "sub x22, x0, x22", - "cmn wzr, w21, lsl #16", - "csel x20, x20, x22, eq", - "bfxil x4, x20, #0, #16" + "cmn wzr, w23, lsl #16", + "csel x23, x21, x22, eq", + "mov x21, x20", + "bfxil x21, x23, #0, #16", + "mov x4, x21" ] }, "bsr eax, ebx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xbd", "ExpectedArm64ASM": [ - "mov w20, w7", + "mov x20, x4", + "mov x21, x7", + "mov w22, w21", "mov x0, #0x1f", - "clz w21, w20", + "clz w21, w22", "sub x21, x0, x21", - "tst w20, w20", - "csel x4, x4, x21, eq" + "tst w22, w22", + "csel x22, x20, x21, eq", + "mov x4, x22" ] }, "bsr rax, rbx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xbd", "ExpectedArm64ASM": [ + "mov x20, x4", + "mov x21, x7", "mov x0, #0x3f", - "clz x20, x7", - "sub x20, x0, x20", - "tst x7, x7", - "csel x4, x4, x20, eq" + "clz x22, x21", + "sub x22, x0, x22", + "tst x21, x21", + "csel x21, x20, x22, eq", + "mov x4, x21" ] }, "xadd al, bl": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 16, "Comment": "0x0f 0xc0", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxtb w21, w7", - "eor w27, w20, w21", - "lsl w0, w20, #24", - "cmn w0, w21, lsl #24", - "add w26, w20, w21", - "bfxil x7, x20, #0, #8", - "bfxil x4, x26, #0, #8" + "mov x20, x4", + "uxtb w21, w20", + "mov x22, x7", + "uxtb w23, w22", + "eor w24, w21, w23", + "mov x27, x24", + "lsl w0, w21, #24", + "cmn w0, w23, lsl #24", + "add w24, w21, w23", + "mov x26, x24", + "mov x23, x22", + "bfxil x23, x21, #0, #8", + "mov x7, x23", + "mov x21, x20", + "bfxil x21, x24, #0, #8", + "mov x4, x21" ] }, "xadd [rax], bl": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xc0", "ExpectedArm64ASM": [ - "uxtb w20, w7", - "ldaddalb w20, w21, [x4]", - "bfxil x7, x21, #0, #8", - "eor w27, w21, w20", - "lsl w0, w21, #24", - "cmn w0, w20, lsl #24", - "add w26, w21, w20" + "mov x20, x4", + "mov x21, x7", + "uxtb w22, w21", + "ldaddalb w22, w23, [x20]", + "mov x20, x21", + "bfxil x20, x23, #0, #8", + "mov x7, x20", + "eor w20, w23, w22", + "mov x27, x20", + "lsl w0, w23, #24", + "cmn w0, w22, lsl #24", + "add w20, w23, w22", + "mov x26, x20" ] }, "xadd ax, bx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 16, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w7", - "eor w27, w20, w21", - "lsl w0, w20, #16", - "cmn w0, w21, lsl #16", - "add w26, w20, w21", - "bfxil x7, x20, #0, #16", - "bfxil x4, x26, #0, #16" + "mov x20, x4", + "uxth w21, w20", + "mov x22, x7", + "uxth w23, w22", + "eor w24, w21, w23", + "mov x27, x24", + "lsl w0, w21, #16", + "cmn w0, w23, lsl #16", + "add w24, w21, w23", + "mov x26, x24", + "mov x23, x22", + "bfxil x23, x21, #0, #16", + "mov x7, x23", + "mov x21, x20", + "bfxil x21, x24, #0, #16", + "mov x4, x21" ] }, "xadd [rax], bx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ - "uxth w20, w7", - "ldaddalh w20, w21, [x4]", - "bfxil x7, x21, #0, #16", - "eor w27, w21, w20", - "lsl w0, w21, #16", - "cmn w0, w20, lsl #16", - "add w26, w21, w20" + "mov x20, x4", + "mov x21, x7", + "uxth w22, w21", + "ldaddalh w22, w23, [x20]", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x7, x20", + "eor w20, w23, w22", + "mov x27, x20", + "lsl w0, w23, #16", + "cmn w0, w22, lsl #16", + "add w20, w23, w22", + "mov x26, x20" ] }, "xadd eax, ebx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ - "mov w20, w4", - "mov w21, w7", - "eor w27, w20, w21", - "adds w26, w20, w21", - "mov x7, x20", - "mov x4, x26" + "mov x20, x4", + "mov w21, w20", + "mov x20, x7", + "mov w22, w20", + "eor w20, w21, w22", + "mov x27, x20", + "adds w20, w21, w22", + "mov x26, x20", + "mov x7, x21", + "mov x4, x20" ] }, "xadd [rax], ebx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ - "mov w20, w7", - "ldaddal w20, w7, [x4]", - "eor w27, w7, w20", - "adds w26, w7, w20" + "mov x20, x4", + "mov x21, x7", + "mov w22, w21", + "ldaddal w22, w21, [x20]", + "mov x7, x21", + "eor w20, w21, w22", + "mov x27, x20", + "adds w20, w21, w22", + "mov x26, x20" ] }, "xadd rax, rbx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ "mov x20, x4", - "eor w27, w20, w7", - "adds x26, x20, x7", + "mov x21, x7", + "eor w22, w20, w21", + "mov x27, x22", + "adds x22, x20, x21", + "mov x26, x22", "mov x7, x20", - "mov x4, x26" + "mov x4, x22" ] }, "xadd [rax], rbx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ - "mov x20, x7", - "ldaddal x20, x7, [x4]", - "eor w27, w7, w20", - "adds x26, x7, x20" + "mov x20, x4", + "mov x21, x7", + "ldaddal x21, x22, [x20]", + "mov x7, x22", + "eor w20, w22, w21", + "mov x27, x20", + "adds x20, x22, x21", + "mov x26, x20" ] }, "pmovmskb eax, mm0": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xd7", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #2272]", - "cmlt v2.16b, v2.16b, #0", - "and v2.16b, v2.16b, v3.16b", - "addp v2.16b, v2.16b, v2.16b", - "addp v2.8b, v2.8b, v2.8b", - "addp v2.8b, v2.8b, v2.8b", - "umov w4, v2.h[0]" + "cmlt v4.16b, v2.16b, #0", + "and v2.16b, v4.16b, v3.16b", + "addp v3.16b, v2.16b, v2.16b", + "addp v2.8b, v3.8b, v3.8b", + "addp v3.8b, v2.8b, v2.8b", + "umov w20, v3.h[0]", + "mov x4, x20" ] }, "maskmovq mm0, mm1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xf7", "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", - "cmlt v2.16b, v2.16b, #0", - "ldr d3, [x28, #768]", - "ldr d4, [x11]", - "bsl v2.8b, v3.8b, v4.8b", - "str d2, [x11]" + "cmlt v3.16b, v2.16b, #0", + "ldr d2, [x28, #768]", + "mov x20, x11", + "ldr d4, [x20]", + "mov v5.8b, v3.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str d5, [x20]" ] } } diff --git a/unittests/InstructionCountCI/FlagM/SecondaryGroup.json b/unittests/InstructionCountCI/FlagM/SecondaryGroup.json index 1a465a88f6..5e8938ca0e 100644 --- a/unittests/InstructionCountCI/FlagM/SecondaryGroup.json +++ b/unittests/InstructionCountCI/FlagM/SecondaryGroup.json @@ -13,773 +13,916 @@ }, "Instructions": { "sgdt [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP7 0x0F 0x1 /0", "ExpectedArm64ASM": [ - "mov w20, #0x0", - "strh w20, [x4]", - "mov x20, #0xfffffffffffe0000", - "stur x20, [x4, #2]" + "mov x20, x4", + "mov w21, #0x0", + "strh w21, [x20]", + "mov x21, #0xfffffffffffe0000", + "stur x21, [x20, #2]" ] }, "bt ax, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP8 0x0F 0xBA /4", "ExpectedArm64ASM": [ - "rmif x4, #63, #nzCv" + "mov x20, x4", + "rmif x20, #63, #nzCv" ] }, "bt eax, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP8 0x0F 0xBA /4", "ExpectedArm64ASM": [ - "rmif x4, #63, #nzCv" + "mov x20, x4", + "rmif x20, #63, #nzCv" ] }, "bt rax, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP8 0x0F 0xBA /4", "ExpectedArm64ASM": [ - "rmif x4, #63, #nzCv" + "mov x20, x4", + "rmif x20, #63, #nzCv" ] }, "bt ax, 15": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP8 0x0F 0xBA /4", "ExpectedArm64ASM": [ - "rmif x4, #14, #nzCv" + "mov x20, x4", + "rmif x20, #14, #nzCv" ] }, "bt eax, 31": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP8 0x0F 0xBA /4", "ExpectedArm64ASM": [ - "rmif x4, #30, #nzCv" + "mov x20, x4", + "rmif x20, #30, #nzCv" ] }, "bt rax, 63": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP8 0x0F 0xBA /4", "ExpectedArm64ASM": [ - "rmif x4, #62, #nzCv" + "mov x20, x4", + "rmif x20, #62, #nzCv" ] }, "bt word [rax], 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "ldrb w21, [x20]", + "rmif x21, #63, #nzCv" ] }, "bt dword [rax], 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "ldrb w21, [x20]", + "rmif x21, #63, #nzCv" ] }, "bt qword [rax], 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "ldrb w21, [x20]", + "rmif x21, #63, #nzCv" ] }, "bt word [rax], 15": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #1]", - "lsr w20, w20, #7", + "mov x20, x4", + "ldrb w21, [x20, #1]", + "lsr w20, w21, #7", "rmif x20, #63, #nzCv" ] }, "bt dword [rax], 31": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #3]", - "lsr w20, w20, #7", + "mov x20, x4", + "ldrb w21, [x20, #3]", + "lsr w20, w21, #7", "rmif x20, #63, #nzCv" ] }, "bt qword [rax], 63": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #7]", - "lsr w20, w20, #7", + "mov x20, x4", + "ldrb w21, [x20, #7]", + "lsr w20, w21, #7", "rmif x20, #63, #nzCv" ] }, "bts ax, 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /5", "ExpectedArm64ASM": [ - "rmif x4, #63, #nzCv", - "orr w20, w4, #0x1", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "rmif x20, #63, #nzCv", + "orr w21, w20, #0x1", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "bts eax, 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /5", "ExpectedArm64ASM": [ - "rmif x4, #63, #nzCv", - "orr w4, w4, #0x1" + "mov x20, x4", + "rmif x20, #63, #nzCv", + "orr w21, w20, #0x1", + "mov x4, x21" ] }, "bts rax, 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /5", "ExpectedArm64ASM": [ - "rmif x4, #63, #nzCv", - "orr x4, x4, #0x1" + "mov x20, x4", + "rmif x20, #63, #nzCv", + "orr x21, x20, #0x1", + "mov x4, x21" ] }, "bts ax, 15": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /5", "ExpectedArm64ASM": [ - "rmif x4, #14, #nzCv", - "orr w20, w4, #0x8000", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "rmif x20, #14, #nzCv", + "orr w21, w20, #0x8000", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "bts eax, 31": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /5", "ExpectedArm64ASM": [ - "rmif x4, #30, #nzCv", - "orr w4, w4, #0x80000000" + "mov x20, x4", + "rmif x20, #30, #nzCv", + "orr w21, w20, #0x80000000", + "mov x4, x21" ] }, "bts rax, 63": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /5", "ExpectedArm64ASM": [ - "rmif x4, #62, #nzCv", - "orr x4, x4, #0x8000000000000000" + "mov x20, x4", + "rmif x20, #62, #nzCv", + "orr x21, x20, #0x8000000000000000", + "mov x4, x21" ] }, "bts word [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "orr x21, x20, #0x1", - "strb w21, [x4]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "ldrb w21, [x20]", + "orr x22, x21, #0x1", + "strb w22, [x20]", + "rmif x21, #63, #nzCv" ] }, "bts dword [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "orr x21, x20, #0x1", - "strb w21, [x4]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "ldrb w21, [x20]", + "orr x22, x21, #0x1", + "strb w22, [x20]", + "rmif x21, #63, #nzCv" ] }, "bts qword [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "orr x21, x20, #0x1", - "strb w21, [x4]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "ldrb w21, [x20]", + "orr x22, x21, #0x1", + "strb w22, [x20]", + "rmif x21, #63, #nzCv" ] }, "bts word [rax], 15": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #1]", - "orr x21, x20, #0x80", - "strb w21, [x4, #1]", - "lsr w20, w20, #7", + "mov x20, x4", + "ldrb w21, [x20, #1]", + "orr x22, x21, #0x80", + "strb w22, [x20, #1]", + "lsr w20, w21, #7", "rmif x20, #63, #nzCv" ] }, "bts dword [rax], 31": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #3]", - "orr x21, x20, #0x80", - "strb w21, [x4, #3]", - "lsr w20, w20, #7", + "mov x20, x4", + "ldrb w21, [x20, #3]", + "orr x22, x21, #0x80", + "strb w22, [x20, #3]", + "lsr w20, w21, #7", "rmif x20, #63, #nzCv" ] }, "bts qword [rax], 63": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #7]", - "orr x21, x20, #0x80", - "strb w21, [x4, #7]", - "lsr w20, w20, #7", + "mov x20, x4", + "ldrb w21, [x20, #7]", + "orr x22, x21, #0x80", + "strb w22, [x20, #7]", + "lsr w20, w21, #7", "rmif x20, #63, #nzCv" ] }, "lock bts word [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldsetalb w21, w20, [x20]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldsetalb w20, w22, [x21]", + "rmif x22, #63, #nzCv" ] }, "lock bts dword [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldsetalb w21, w20, [x20]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldsetalb w20, w22, [x21]", + "rmif x22, #63, #nzCv" ] }, "lock bts qword [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldsetalb w21, w20, [x20]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldsetalb w20, w22, [x21]", + "rmif x22, #63, #nzCv" ] }, "lock bts word [rax], 15": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x1 (1)", - "mov w21, #0x80", - "ldsetalb w21, w20, [x20]", - "lsr w20, w20, #7", + "mov x20, x4", + "add x21, x20, #0x1 (1)", + "mov w20, #0x80", + "ldsetalb w20, w22, [x21]", + "lsr w20, w22, #7", "rmif x20, #63, #nzCv" ] }, "lock bts dword [rax], 31": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x3 (3)", - "mov w21, #0x80", - "ldsetalb w21, w20, [x20]", - "lsr w20, w20, #7", + "mov x20, x4", + "add x21, x20, #0x3 (3)", + "mov w20, #0x80", + "ldsetalb w20, w22, [x21]", + "lsr w20, w22, #7", "rmif x20, #63, #nzCv" ] }, "lock bts qword [rax], 63": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x7 (7)", - "mov w21, #0x80", - "ldsetalb w21, w20, [x20]", - "lsr w20, w20, #7", + "mov x20, x4", + "add x21, x20, #0x7 (7)", + "mov w20, #0x80", + "ldsetalb w20, w22, [x21]", + "lsr w20, w22, #7", "rmif x20, #63, #nzCv" ] }, "btr ax, 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "rmif x4, #63, #nzCv", - "and w20, w4, #0xfffffffe", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "rmif x20, #63, #nzCv", + "and w21, w20, #0xfffffffe", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "btr eax, 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "rmif x4, #63, #nzCv", - "and w4, w4, #0xfffffffe" + "mov x20, x4", + "rmif x20, #63, #nzCv", + "and w21, w20, #0xfffffffe", + "mov x4, x21" ] }, "btr rax, 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "rmif x4, #63, #nzCv", - "and x4, x4, #0xfffffffffffffffe" + "mov x20, x4", + "rmif x20, #63, #nzCv", + "and x21, x20, #0xfffffffffffffffe", + "mov x4, x21" ] }, "btr ax, 15": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "rmif x4, #14, #nzCv", - "and w20, w4, #0xffff7fff", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "rmif x20, #14, #nzCv", + "and w21, w20, #0xffff7fff", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "btr eax, 31": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "rmif x4, #30, #nzCv", - "and w4, w4, #0x7fffffff" + "mov x20, x4", + "rmif x20, #30, #nzCv", + "and w21, w20, #0x7fffffff", + "mov x4, x21" ] }, "btr rax, 63": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "rmif x4, #62, #nzCv", - "and x4, x4, #0x7fffffffffffffff" + "mov x20, x4", + "rmif x20, #62, #nzCv", + "and x21, x20, #0x7fffffffffffffff", + "mov x4, x21" ] }, "btr word [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "and x21, x20, #0xfffffffffffffffe", - "strb w21, [x4]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "ldrb w21, [x20]", + "and x22, x21, #0xfffffffffffffffe", + "strb w22, [x20]", + "rmif x21, #63, #nzCv" ] }, "btr dword [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "and x21, x20, #0xfffffffffffffffe", - "strb w21, [x4]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "ldrb w21, [x20]", + "and x22, x21, #0xfffffffffffffffe", + "strb w22, [x20]", + "rmif x21, #63, #nzCv" ] }, "btr qword [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "and x21, x20, #0xfffffffffffffffe", - "strb w21, [x4]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "ldrb w21, [x20]", + "and x22, x21, #0xfffffffffffffffe", + "strb w22, [x20]", + "rmif x21, #63, #nzCv" ] }, "btr word [rax], 15": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #1]", - "and x21, x20, #0xffffffffffffff7f", - "strb w21, [x4, #1]", - "lsr w20, w20, #7", + "mov x20, x4", + "ldrb w21, [x20, #1]", + "and x22, x21, #0xffffffffffffff7f", + "strb w22, [x20, #1]", + "lsr w20, w21, #7", "rmif x20, #63, #nzCv" ] }, "btr dword [rax], 31": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #3]", - "and x21, x20, #0xffffffffffffff7f", - "strb w21, [x4, #3]", - "lsr w20, w20, #7", + "mov x20, x4", + "ldrb w21, [x20, #3]", + "and x22, x21, #0xffffffffffffff7f", + "strb w22, [x20, #3]", + "lsr w20, w21, #7", "rmif x20, #63, #nzCv" ] }, "btr qword [rax], 63": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #7]", - "and x21, x20, #0xffffffffffffff7f", - "strb w21, [x4, #7]", - "lsr w20, w20, #7", + "mov x20, x4", + "ldrb w21, [x20, #7]", + "and x22, x21, #0xffffffffffffff7f", + "strb w22, [x20, #7]", + "lsr w20, w21, #7", "rmif x20, #63, #nzCv" ] }, "lock btr word [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldclralb w21, w20, [x20]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldclralb w20, w22, [x21]", + "rmif x22, #63, #nzCv" ] }, "lock btr dword [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldclralb w21, w20, [x20]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldclralb w20, w22, [x21]", + "rmif x22, #63, #nzCv" ] }, "lock btr qword [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldclralb w21, w20, [x20]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldclralb w20, w22, [x21]", + "rmif x22, #63, #nzCv" ] }, "lock btr word [rax], 15": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x1 (1)", - "mov w21, #0x80", - "ldclralb w21, w20, [x20]", - "lsr w20, w20, #7", + "mov x20, x4", + "add x21, x20, #0x1 (1)", + "mov w20, #0x80", + "ldclralb w20, w22, [x21]", + "lsr w20, w22, #7", "rmif x20, #63, #nzCv" ] }, "lock btr dword [rax], 31": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x3 (3)", - "mov w21, #0x80", - "ldclralb w21, w20, [x20]", - "lsr w20, w20, #7", + "mov x20, x4", + "add x21, x20, #0x3 (3)", + "mov w20, #0x80", + "ldclralb w20, w22, [x21]", + "lsr w20, w22, #7", "rmif x20, #63, #nzCv" ] }, "lock btr qword [rax], 63": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x7 (7)", - "mov w21, #0x80", - "ldclralb w21, w20, [x20]", - "lsr w20, w20, #7", + "mov x20, x4", + "add x21, x20, #0x7 (7)", + "mov w20, #0x80", + "ldclralb w20, w22, [x21]", + "lsr w20, w22, #7", "rmif x20, #63, #nzCv" ] }, "btc ax, 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /7", "ExpectedArm64ASM": [ - "rmif x4, #63, #nzCv", - "eor w20, w4, #0x1", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "rmif x20, #63, #nzCv", + "eor w21, w20, #0x1", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "btc eax, 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /7", "ExpectedArm64ASM": [ - "rmif x4, #63, #nzCv", - "eor w4, w4, #0x1" + "mov x20, x4", + "rmif x20, #63, #nzCv", + "eor w21, w20, #0x1", + "mov x4, x21" ] }, "btc rax, 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /7", "ExpectedArm64ASM": [ - "rmif x4, #63, #nzCv", - "eor x4, x4, #0x1" + "mov x20, x4", + "rmif x20, #63, #nzCv", + "eor x21, x20, #0x1", + "mov x4, x21" ] }, "btc ax, 15": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /7", "ExpectedArm64ASM": [ - "rmif x4, #14, #nzCv", - "eor w20, w4, #0x8000", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "rmif x20, #14, #nzCv", + "eor w21, w20, #0x8000", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "btc eax, 31": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /7", "ExpectedArm64ASM": [ - "rmif x4, #30, #nzCv", - "eor w4, w4, #0x80000000" + "mov x20, x4", + "rmif x20, #30, #nzCv", + "eor w21, w20, #0x80000000", + "mov x4, x21" ] }, "btc rax, 63": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /7", "ExpectedArm64ASM": [ - "rmif x4, #62, #nzCv", - "eor x4, x4, #0x8000000000000000" + "mov x20, x4", + "rmif x20, #62, #nzCv", + "eor x21, x20, #0x8000000000000000", + "mov x4, x21" ] }, "btc word [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "eor x21, x20, #0x1", - "strb w21, [x4]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "ldrb w21, [x20]", + "eor x22, x21, #0x1", + "strb w22, [x20]", + "rmif x21, #63, #nzCv" ] }, "btc dword [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "eor x21, x20, #0x1", - "strb w21, [x4]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "ldrb w21, [x20]", + "eor x22, x21, #0x1", + "strb w22, [x20]", + "rmif x21, #63, #nzCv" ] }, "btc qword [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "eor x21, x20, #0x1", - "strb w21, [x4]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "ldrb w21, [x20]", + "eor x22, x21, #0x1", + "strb w22, [x20]", + "rmif x21, #63, #nzCv" ] }, "btc word [rax], 15": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #1]", - "eor x21, x20, #0x80", - "strb w21, [x4, #1]", - "lsr w20, w20, #7", + "mov x20, x4", + "ldrb w21, [x20, #1]", + "eor x22, x21, #0x80", + "strb w22, [x20, #1]", + "lsr w20, w21, #7", "rmif x20, #63, #nzCv" ] }, "btc dword [rax], 31": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #3]", - "eor x21, x20, #0x80", - "strb w21, [x4, #3]", - "lsr w20, w20, #7", + "mov x20, x4", + "ldrb w21, [x20, #3]", + "eor x22, x21, #0x80", + "strb w22, [x20, #3]", + "lsr w20, w21, #7", "rmif x20, #63, #nzCv" ] }, "btc qword [rax], 63": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #7]", - "eor x21, x20, #0x80", - "strb w21, [x4, #7]", - "lsr w20, w20, #7", + "mov x20, x4", + "ldrb w21, [x20, #7]", + "eor x22, x21, #0x80", + "strb w22, [x20, #7]", + "lsr w20, w21, #7", "rmif x20, #63, #nzCv" ] }, "lock btc word [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldeoralb w21, w20, [x20]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldeoralb w20, w22, [x21]", + "rmif x22, #63, #nzCv" ] }, "lock btc dword [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldeoralb w21, w20, [x20]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldeoralb w20, w22, [x21]", + "rmif x22, #63, #nzCv" ] }, "lock btc qword [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldeoralb w21, w20, [x20]", - "rmif x20, #63, #nzCv" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldeoralb w20, w22, [x21]", + "rmif x22, #63, #nzCv" ] }, "lock btc word [rax], 15": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x1 (1)", - "mov w21, #0x80", - "ldeoralb w21, w20, [x20]", - "lsr w20, w20, #7", + "mov x20, x4", + "add x21, x20, #0x1 (1)", + "mov w20, #0x80", + "ldeoralb w20, w22, [x21]", + "lsr w20, w22, #7", "rmif x20, #63, #nzCv" ] }, "lock btc dword [rax], 31": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x3 (3)", - "mov w21, #0x80", - "ldeoralb w21, w20, [x20]", - "lsr w20, w20, #7", + "mov x20, x4", + "add x21, x20, #0x3 (3)", + "mov w20, #0x80", + "ldeoralb w20, w22, [x21]", + "lsr w20, w22, #7", "rmif x20, #63, #nzCv" ] }, "lock btc qword [rax], 63": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x7 (7)", - "mov w21, #0x80", - "ldeoralb w21, w20, [x20]", - "lsr w20, w20, #7", + "mov x20, x4", + "add x21, x20, #0x7 (7)", + "mov w20, #0x80", + "ldeoralb w20, w22, [x21]", + "lsr w20, w22, #7", "rmif x20, #63, #nzCv" ] }, "cmpxchg8b [rbp]": { - "ExpectedInstructionCount": 22, + "ExpectedInstructionCount": 37, "Comment": "GROUP9 0x0F 0xC7 /1", "ExpectedArm64ASM": [ - "add x20, x9, #0x0 (0)", - "mov w21, w4", - "mov w22, w6", - "mov w23, w22", - "mov w22, w21", - "mov w21, w7", - "mov w24, w5", - "mov w25, w24", - "mov w24, w21", - "mov w2, w22", - "mov w3, w23", - "caspal w2, w3, w24, w25, [x20]", + "sub sp, sp, #0x40 (64)", + "mov x20, x9", + "add x21, x20, #0x0 (0)", + "mov x20, x4", + "mov w22, w20", + "mov x23, x6", + "mov w24, w23", + "mov x30, x24", + "mov w24, w22", + "mov w25, w30", + "mov x22, x7", + "mov w30, w22", + "mov x22, x5", + "mov w18, w22", + "str x23, [sp]", + "mov w22, w30", + "mov w23, w18", + "str x20, [sp, #32]", + "mov x30, x21", + "mov w2, w24", + "mov w3, w25", + "caspal w2, w3, w22, w23, [x30]", "mov w20, w2", "mov w21, w3", - "mov w24, w20", - "mov w25, w21", + "mov w22, w20", + "mov w23, w21", "mrs x0, nzcv", - "cmp w20, w22", - "ccmp w21, w23, #nzcv, eq", + "cmp w20, w24", + "ccmp w21, w25, #nzcv, eq", "rmif x0, #0, #NzCV", - "csel x4, x24, x4, ne", - "csel x6, x25, x6, ne" + "ldr x20, [sp, #32]", + "csel x21, x22, x20, ne", + "mov x4, x21", + "ldr x20, [sp]", + "csel x21, x23, x20, ne", + "mov x6, x21", + "add sp, sp, #0x40 (64)" ] }, "cmpxchg16b [rbp]": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 33, "Comment": "GROUP9 0x0F 0xC7 /1", "ExpectedArm64ASM": [ - "add x20, x9, #0x0 (0)", - "mov x22, x4", - "mov x23, x6", - "mov x24, x7", - "mov x25, x5", - "mov x2, x22", - "mov x3, x23", - "caspal x2, x3, x24, x25, [x20]", + "sub sp, sp, #0x40 (64)", + "mov x20, x9", + "add x21, x20, #0x0 (0)", + "mov x20, x4", + "mov x22, x6", + "mov x24, x20", + "mov x25, x22", + "mov x23, x7", + "mov x30, x5", + "str x22, [sp]", + "mov x18, x23", + "mov x22, x18", + "mov x23, x30", + "str x20, [sp, #32]", + "mov x30, x21", + "mov x2, x24", + "mov x3, x25", + "caspal x2, x3, x22, x23, [x30]", "mov x20, x2", "mov x21, x3", - "mov x24, x20", - "mov x25, x21", + "mov x22, x20", + "mov x23, x21", "mrs x0, nzcv", - "cmp w20, w22", - "ccmp w21, w23, #nzcv, eq", + "cmp w20, w24", + "ccmp w21, w25, #nzcv, eq", "rmif x0, #0, #NzCV", - "csel x4, x24, x4, ne", - "csel x6, x25, x6, ne" + "ldr x20, [sp, #32]", + "csel x21, x22, x20, ne", + "mov x4, x21", + "ldr x20, [sp]", + "csel x21, x23, x20, ne", + "mov x6, x21", + "add sp, sp, #0x40 (64)" ] }, "rdrand ax": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 14, "Comment": "GROUP9 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "mrs x20, rndr", "cset x21, ne", "mov x22, x20", - "mov x20, x21", - "bfxil x4, x22, #0, #16", - "mov w26, #0x1", - "mov w27, #0x0", - "lsl x20, x20, #29", + "mov x23, x21", + "mov x20, x4", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21", + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20", + "lsl x20, x23, #29", "msr nzcv, x20" ] }, "rdrand eax": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": "GROUP9 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "mrs x20, rndr", "cset x21, ne", "mov x22, x20", - "mov x20, x21", - "mov w4, w22", - "mov w26, #0x1", - "mov w27, #0x0", - "lsl x20, x20, #29", + "mov x23, x21", + "mov w20, w22", + "mov x4, x20", + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20", + "lsl x20, x23, #29", "msr nzcv, x20" ] }, "rdrand rax": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "GROUP9 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "mrs x20, rndr", "cset x21, ne", - "mov x4, x20", - "mov x20, x21", - "mov w26, #0x1", - "mov w27, #0x0", - "lsl x20, x20, #29", + "mov x22, x20", + "mov x23, x21", + "mov x4, x22", + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20", + "lsl x20, x23, #29", "msr nzcv, x20" ] }, "rdseed ax": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 14, "Comment": "GROUP9 0x0F 0xC7 /7", "ExpectedArm64ASM": [ "mrs x20, rndrrs", "cset x21, ne", "mov x22, x20", - "mov x20, x21", - "bfxil x4, x22, #0, #16", - "mov w26, #0x1", - "mov w27, #0x0", - "lsl x20, x20, #29", + "mov x23, x21", + "mov x20, x4", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21", + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20", + "lsl x20, x23, #29", "msr nzcv, x20" ] }, "rdseed eax": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": "GROUP9 0x0F 0xC7 /7", "ExpectedArm64ASM": [ "mrs x20, rndrrs", "cset x21, ne", "mov x22, x20", - "mov x20, x21", - "mov w4, w22", - "mov w26, #0x1", - "mov w27, #0x0", - "lsl x20, x20, #29", + "mov x23, x21", + "mov w20, w22", + "mov x4, x20", + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20", + "lsl x20, x23, #29", "msr nzcv, x20" ] }, "rdseed rax": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "GROUP9 0x0F 0xC7 /7", "ExpectedArm64ASM": [ "mrs x20, rndrrs", "cset x21, ne", - "mov x4, x20", - "mov x20, x21", - "mov w26, #0x1", - "mov w27, #0x0", - "lsl x20, x20, #29", + "mov x22, x20", + "mov x23, x21", + "mov x4, x22", + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20", + "lsl x20, x23, #29", "msr nzcv, x20" ] }, @@ -795,8 +938,8 @@ "Comment": "GROUP12 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ushr v2.8h, v2.8h, #15", - "str d2, [x28, #768]" + "ushr v3.8h, v2.8h, #15", + "str d3, [x28, #768]" ] }, "psrlw mm0, 16": { @@ -805,8 +948,8 @@ "Comment": "GROUP12 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "movi v2.2d, #0x0", - "str d2, [x28, #768]" + "movi v3.2d, #0x0", + "str d3, [x28, #768]" ] }, "psrlw xmm0, 0": { @@ -816,19 +959,23 @@ "ExpectedArm64ASM": [] }, "psrlw xmm0, 15": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP12 0x0F 0xC7 /2", "ExpectedArm64ASM": [ - "ushr v16.8h, v16.8h, #15" + "mov v2.16b, v16.16b", + "ushr v3.8h, v2.8h, #15", + "mov v16.16b, v3.16b" ] }, "psrlw xmm0, 16": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP12 0x0F 0xC7 /2", "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov v2.16b, v16.16b", + "movi v3.2d, #0x0", + "mov v16.16b, v3.16b" ] }, "psraw mm0, 0": { @@ -843,8 +990,8 @@ "Comment": "GROUP12 0x0F 0xC7 /3", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "sshr v2.8h, v2.8h, #15", - "str d2, [x28, #768]" + "sshr v3.8h, v2.8h, #15", + "str d3, [x28, #768]" ] }, "psraw mm0, 16": { @@ -853,8 +1000,8 @@ "Comment": "GROUP12 0x0F 0xC7 /3", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "sshr v2.8h, v2.8h, #15", - "str d2, [x28, #768]" + "sshr v3.8h, v2.8h, #15", + "str d3, [x28, #768]" ] }, "psraw xmm0, 0": { @@ -864,19 +1011,23 @@ "ExpectedArm64ASM": [] }, "psraw xmm0, 15": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP12 0x0F 0xC7 /3", "ExpectedArm64ASM": [ - "sshr v16.8h, v16.8h, #15" + "mov v2.16b, v16.16b", + "sshr v3.8h, v2.8h, #15", + "mov v16.16b, v3.16b" ] }, "psraw xmm0, 16": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP12 0x0F 0xC7 /3", "ExpectedArm64ASM": [ - "sshr v16.8h, v16.8h, #15" + "mov v2.16b, v16.16b", + "sshr v3.8h, v2.8h, #15", + "mov v16.16b, v3.16b" ] }, "psllw mm0, 0": { @@ -891,8 +1042,8 @@ "Comment": "GROUP12 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "shl v2.8h, v2.8h, #15", - "str d2, [x28, #768]" + "shl v3.8h, v2.8h, #15", + "str d3, [x28, #768]" ] }, "psllw mm0, 16": { @@ -901,8 +1052,8 @@ "Comment": "GROUP12 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "movi v2.2d, #0x0", - "str d2, [x28, #768]" + "movi v3.2d, #0x0", + "str d3, [x28, #768]" ] }, "psllw xmm0, 0": { @@ -912,19 +1063,23 @@ "ExpectedArm64ASM": [] }, "psllw xmm0, 15": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP12 0x0F 0xC7 /6", "ExpectedArm64ASM": [ - "shl v16.8h, v16.8h, #15" + "mov v2.16b, v16.16b", + "shl v3.8h, v2.8h, #15", + "mov v16.16b, v3.16b" ] }, "psllw xmm0, 16": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP12 0x0F 0xC7 /6", "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov v2.16b, v16.16b", + "movi v3.2d, #0x0", + "mov v16.16b, v3.16b" ] }, "psrld mm0, 0": { @@ -939,8 +1094,8 @@ "Comment": "GROUP13 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ushr v2.4s, v2.4s, #31", - "str d2, [x28, #768]" + "ushr v3.4s, v2.4s, #31", + "str d3, [x28, #768]" ] }, "psrld mm0, 32": { @@ -949,8 +1104,8 @@ "Comment": "GROUP13 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "movi v2.2d, #0x0", - "str d2, [x28, #768]" + "movi v3.2d, #0x0", + "str d3, [x28, #768]" ] }, "psrld xmm0, 0": { @@ -960,19 +1115,23 @@ "ExpectedArm64ASM": [] }, "psrld xmm0, 31": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP13 0x0F 0xC7 /2", "ExpectedArm64ASM": [ - "ushr v16.4s, v16.4s, #31" + "mov v2.16b, v16.16b", + "ushr v3.4s, v2.4s, #31", + "mov v16.16b, v3.16b" ] }, "psrld xmm0, 32": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP13 0x0F 0xC7 /2", "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov v2.16b, v16.16b", + "movi v3.2d, #0x0", + "mov v16.16b, v3.16b" ] }, "psrad mm0, 0": { @@ -987,8 +1146,8 @@ "Comment": "GROUP13 0x0F 0xC7 /3", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "sshr v2.4s, v2.4s, #31", - "str d2, [x28, #768]" + "sshr v3.4s, v2.4s, #31", + "str d3, [x28, #768]" ] }, "psrad mm0, 32": { @@ -997,8 +1156,8 @@ "Comment": "GROUP13 0x0F 0xC7 /3", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "sshr v2.4s, v2.4s, #31", - "str d2, [x28, #768]" + "sshr v3.4s, v2.4s, #31", + "str d3, [x28, #768]" ] }, "psrad xmm0, 0": { @@ -1008,19 +1167,23 @@ "ExpectedArm64ASM": [] }, "psrad xmm0, 31": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP13 0x0F 0xC7 /3", "ExpectedArm64ASM": [ - "sshr v16.4s, v16.4s, #31" + "mov v2.16b, v16.16b", + "sshr v3.4s, v2.4s, #31", + "mov v16.16b, v3.16b" ] }, "psrad xmm0, 32": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP13 0x0F 0xC7 /3", "ExpectedArm64ASM": [ - "sshr v16.4s, v16.4s, #31" + "mov v2.16b, v16.16b", + "sshr v3.4s, v2.4s, #31", + "mov v16.16b, v3.16b" ] }, "pslld mm0, 0": { @@ -1035,8 +1198,8 @@ "Comment": "GROUP13 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "shl v2.4s, v2.4s, #31", - "str d2, [x28, #768]" + "shl v3.4s, v2.4s, #31", + "str d3, [x28, #768]" ] }, "pslld mm0, 32": { @@ -1045,8 +1208,8 @@ "Comment": "GROUP13 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "movi v2.2d, #0x0", - "str d2, [x28, #768]" + "movi v3.2d, #0x0", + "str d3, [x28, #768]" ] }, "pslld xmm0, 0": { @@ -1056,19 +1219,23 @@ "ExpectedArm64ASM": [] }, "pslld xmm0, 31": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP13 0x0F 0xC7 /6", "ExpectedArm64ASM": [ - "shl v16.4s, v16.4s, #31" + "mov v2.16b, v16.16b", + "shl v3.4s, v2.4s, #31", + "mov v16.16b, v3.16b" ] }, "pslld xmm0, 32": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP13 0x0F 0xC7 /6", "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov v2.16b, v16.16b", + "movi v3.2d, #0x0", + "mov v16.16b, v3.16b" ] }, "psrlq mm0, 0": { @@ -1083,8 +1250,8 @@ "Comment": "GROUP14 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ushr v2.2d, v2.2d, #63", - "str d2, [x28, #768]" + "ushr v3.2d, v2.2d, #63", + "str d3, [x28, #768]" ] }, "psrlq mm0, 64": { @@ -1093,8 +1260,8 @@ "Comment": "GROUP14 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "movi v2.2d, #0x0", - "str d2, [x28, #768]" + "movi v3.2d, #0x0", + "str d3, [x28, #768]" ] }, "psrlq xmm0, 0": { @@ -1104,19 +1271,23 @@ "ExpectedArm64ASM": [] }, "psrlq xmm0, 63": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP14 0x0F 0xC7 /2", "ExpectedArm64ASM": [ - "ushr v16.2d, v16.2d, #63" + "mov v2.16b, v16.16b", + "ushr v3.2d, v2.2d, #63", + "mov v16.16b, v3.16b" ] }, "psrlq xmm0, 64": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP14 0x0F 0xC7 /2", "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov v2.16b, v16.16b", + "movi v3.2d, #0x0", + "mov v16.16b, v3.16b" ] }, "psrldq xmm0, 0": { @@ -1126,20 +1297,23 @@ "ExpectedArm64ASM": [] }, "psrldq xmm0, 15": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Type": "SSE", "Comment": "GROUP14 0x0F 0xC7 /3", "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "ext v16.16b, v16.16b, v2.16b, #15" + "mov v2.16b, v16.16b", + "movi v3.2d, #0x0", + "ext v4.16b, v2.16b, v3.16b, #15", + "mov v16.16b, v4.16b" ] }, "psrldq xmm0, 16": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Type": "SSE", "Comment": "GROUP14 0x0F 0xC7 /3", "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b" ] }, "psllq mm0, 0": { @@ -1154,8 +1328,8 @@ "Comment": "GROUP14 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "shl v2.2d, v2.2d, #63", - "str d2, [x28, #768]" + "shl v3.2d, v2.2d, #63", + "str d3, [x28, #768]" ] }, "psllq mm0, 64": { @@ -1164,8 +1338,8 @@ "Comment": "GROUP14 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "movi v2.2d, #0x0", - "str d2, [x28, #768]" + "movi v3.2d, #0x0", + "str d3, [x28, #768]" ] }, "psllq xmm0, 0": { @@ -1175,152 +1349,194 @@ "ExpectedArm64ASM": [] }, "psllq xmm0, 63": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP14 0x0F 0xC7 /6", "ExpectedArm64ASM": [ - "shl v16.2d, v16.2d, #63" + "mov v2.16b, v16.16b", + "shl v3.2d, v2.2d, #63", + "mov v16.16b, v3.16b" ] }, "psllq xmm0, 64": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP14 0x0F 0xC7 /6", "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov v2.16b, v16.16b", + "movi v3.2d, #0x0", + "mov v16.16b, v3.16b" ] }, "fxsave [rax]": { - "ExpectedInstructionCount": 58, + "ExpectedInstructionCount": 77, "Comment": "GROUP15 0x0F 0xAE /0", "ExpectedArm64ASM": [ - "ldrh w20, [x28, #1024]", - "strh w20, [x4]", - "mov w20, #0x0", - "ldrb w21, [x28, #747]", - "bfi x20, x21, #11, #3", + "mov x20, x4", + "ldrh w21, [x28, #1024]", + "strh w21, [x20]", + "mov w21, #0x0", + "ldrb w22, [x28, #747]", + "mov x23, x21", + "bfi x23, x22, #11, #3", "ldrb w21, [x28, #744]", "ldrb w22, [x28, #745]", - "ldrb w23, [x28, #746]", - "ldrb w24, [x28, #750]", - "orr x20, x20, x21, lsl #8", - "orr x20, x20, x22, lsl #9", - "orr x20, x20, x23, lsl #10", - "orr x20, x20, x24, lsl #14", - "strh w20, [x4, #2]", - "ldrb w20, [x28, #1026]", - "strb w20, [x4, #4]", + "ldrb w24, [x28, #746]", + "ldrb w25, [x28, #750]", + "orr x30, x23, x21, lsl #8", + "orr x21, x30, x22, lsl #9", + "orr x22, x21, x24, lsl #10", + "orr x21, x22, x25, lsl #14", + "strh w21, [x20, #2]", + "ldrb w21, [x28, #1026]", + "strb w21, [x20, #4]", "ldr q2, [x28, #768]", - "str q2, [x4, #32]", + "str q2, [x20, #32]", "ldr q2, [x28, #784]", - "str q2, [x4, #48]", + "str q2, [x20, #48]", "ldr q2, [x28, #800]", - "str q2, [x4, #64]", + "str q2, [x20, #64]", "ldr q2, [x28, #816]", - "str q2, [x4, #80]", + "str q2, [x20, #80]", "ldr q2, [x28, #832]", - "str q2, [x4, #96]", + "str q2, [x20, #96]", "ldr q2, [x28, #848]", - "str q2, [x4, #112]", + "str q2, [x20, #112]", "ldr q2, [x28, #864]", - "str q2, [x4, #128]", + "str q2, [x20, #128]", "ldr q2, [x28, #880]", - "str q2, [x4, #144]", - "str q16, [x4, #160]", - "str q17, [x4, #176]", - "str q18, [x4, #192]", - "str q19, [x4, #208]", - "str q20, [x4, #224]", - "str q21, [x4, #240]", - "str q22, [x4, #256]", - "str q23, [x4, #272]", - "str q24, [x4, #288]", - "str q25, [x4, #304]", - "str q26, [x4, #320]", - "str q27, [x4, #336]", - "str q28, [x4, #352]", - "str q29, [x4, #368]", - "str q30, [x4, #384]", - "str q31, [x4, #400]", - "mov w20, #0x1f80", - "mrs x21, fpcr", - "ubfx x21, x21, #22, #3", - "rbit w0, w21", - "bfi x21, x0, #30, #2", - "bfi w20, w21, #13, #3", - "add x21, x4, #0x18 (24)", - "str w20, [x4, #24]", + "str q2, [x20, #144]", + "mov v2.16b, v16.16b", + "str q2, [x20, #160]", + "mov v2.16b, v17.16b", + "str q2, [x20, #176]", + "mov v2.16b, v18.16b", + "str q2, [x20, #192]", + "mov v2.16b, v19.16b", + "str q2, [x20, #208]", + "mov v2.16b, v20.16b", + "str q2, [x20, #224]", + "mov v2.16b, v21.16b", + "str q2, [x20, #240]", + "mov v2.16b, v22.16b", + "str q2, [x20, #256]", + "mov v2.16b, v23.16b", + "str q2, [x20, #272]", + "mov v2.16b, v24.16b", + "str q2, [x20, #288]", + "mov v2.16b, v25.16b", + "str q2, [x20, #304]", + "mov v2.16b, v26.16b", + "str q2, [x20, #320]", + "mov v2.16b, v27.16b", + "str q2, [x20, #336]", + "mov v2.16b, v28.16b", + "str q2, [x20, #352]", + "mov v2.16b, v29.16b", + "str q2, [x20, #368]", + "mov v2.16b, v30.16b", + "str q2, [x20, #384]", + "mov v2.16b, v31.16b", + "str q2, [x20, #400]", + "mov w21, #0x1f80", + "mrs x22, fpcr", + "ubfx x22, x22, #22, #3", + "rbit w0, w22", + "bfi x22, x0, #30, #2", + "mov w23, w21", + "bfi w23, w22, #13, #3", + "add x21, x20, #0x18 (24)", + "str w23, [x20, #24]", "mov w20, #0xffff", "str w20, [x21, #4]" ] }, "rdfsbase eax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP15 0x0F 0xAE /0", "ExpectedArm64ASM": [ - "ldr w4, [x28, #176]" + "ldr w20, [x28, #176]", + "mov x4, x20" ] }, "rdfsbase rax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP15 0x0F 0xAE /0", "ExpectedArm64ASM": [ - "ldr x4, [x28, #176]" + "ldr x20, [x28, #176]", + "mov x4, x20" ] }, "fxrstor [rax]": { - "ExpectedInstructionCount": 56, + "ExpectedInstructionCount": 73, "Comment": "GROUP15 0x0F 0xAE /1", "ExpectedArm64ASM": [ - "ldrh w20, [x4]", - "strh w20, [x28, #1024]", - "ldrh w20, [x4, #2]", - "ubfx w21, w20, #11, #3", - "strb w21, [x28, #747]", - "ubfx w21, w20, #8, #1", - "ubfx w22, w20, #9, #1", - "ubfx w23, w20, #10, #1", - "ubfx w20, w20, #14, #1", - "strb w21, [x28, #744]", - "strb w22, [x28, #745]", - "strb w23, [x28, #746]", - "strb w20, [x28, #750]", - "ldrb w20, [x4, #4]", - "strb w20, [x28, #1026]", - "ldr q2, [x4, #32]", + "mov x20, x4", + "ldrh w21, [x20]", + "strh w21, [x28, #1024]", + "ldrh w21, [x20, #2]", + "ubfx w22, w21, #11, #3", + "strb w22, [x28, #747]", + "ubfx w22, w21, #8, #1", + "ubfx w23, w21, #9, #1", + "ubfx w24, w21, #10, #1", + "ubfx w25, w21, #14, #1", + "strb w22, [x28, #744]", + "strb w23, [x28, #745]", + "strb w24, [x28, #746]", + "strb w25, [x28, #750]", + "ldrb w21, [x20, #4]", + "strb w21, [x28, #1026]", + "ldr q2, [x20, #32]", "str q2, [x28, #768]", - "ldr q2, [x4, #48]", + "ldr q2, [x20, #48]", "str q2, [x28, #784]", - "ldr q2, [x4, #64]", + "ldr q2, [x20, #64]", "str q2, [x28, #800]", - "ldr q2, [x4, #80]", + "ldr q2, [x20, #80]", "str q2, [x28, #816]", - "ldr q2, [x4, #96]", + "ldr q2, [x20, #96]", "str q2, [x28, #832]", - "ldr q2, [x4, #112]", + "ldr q2, [x20, #112]", "str q2, [x28, #848]", - "ldr q2, [x4, #128]", + "ldr q2, [x20, #128]", "str q2, [x28, #864]", - "ldr q2, [x4, #144]", + "ldr q2, [x20, #144]", "str q2, [x28, #880]", - "ldr q16, [x4, #160]", - "ldr q17, [x4, #176]", - "ldr q18, [x4, #192]", - "ldr q19, [x4, #208]", - "ldr q20, [x4, #224]", - "ldr q21, [x4, #240]", - "ldr q22, [x4, #256]", - "ldr q23, [x4, #272]", - "ldr q24, [x4, #288]", - "ldr q25, [x4, #304]", - "ldr q26, [x4, #320]", - "ldr q27, [x4, #336]", - "ldr q28, [x4, #352]", - "ldr q29, [x4, #368]", - "ldr q30, [x4, #384]", - "ldr q31, [x4, #400]", - "ldr w20, [x4, #24]", - "ubfx w20, w20, #13, #3", + "ldr q2, [x20, #160]", + "mov v16.16b, v2.16b", + "ldr q2, [x20, #176]", + "mov v17.16b, v2.16b", + "ldr q2, [x20, #192]", + "mov v18.16b, v2.16b", + "ldr q2, [x20, #208]", + "mov v19.16b, v2.16b", + "ldr q2, [x20, #224]", + "mov v20.16b, v2.16b", + "ldr q2, [x20, #240]", + "mov v21.16b, v2.16b", + "ldr q2, [x20, #256]", + "mov v22.16b, v2.16b", + "ldr q2, [x20, #272]", + "mov v23.16b, v2.16b", + "ldr q2, [x20, #288]", + "mov v24.16b, v2.16b", + "ldr q2, [x20, #304]", + "mov v25.16b, v2.16b", + "ldr q2, [x20, #320]", + "mov v26.16b, v2.16b", + "ldr q2, [x20, #336]", + "mov v27.16b, v2.16b", + "ldr q2, [x20, #352]", + "mov v28.16b, v2.16b", + "ldr q2, [x20, #368]", + "mov v29.16b, v2.16b", + "ldr q2, [x20, #384]", + "mov v30.16b, v2.16b", + "ldr q2, [x20, #400]", + "mov v31.16b, v2.16b", + "ldr w21, [x20, #24]", + "ubfx w20, w21, #13, #3", "rbit w1, w20", "lsr w1, w1, #30", "mrs x0, fpcr", @@ -1331,25 +1547,28 @@ ] }, "rdgsbase eax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP15 0x0F 0xAE /1", "ExpectedArm64ASM": [ - "ldr w4, [x28, #168]" + "ldr w20, [x28, #168]", + "mov x4, x20" ] }, "rdgsbase rax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP15 0x0F 0xAE /1", "ExpectedArm64ASM": [ - "ldr x4, [x28, #168]" + "ldr x20, [x28, #168]", + "mov x4, x20" ] }, "ldmxcsr [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": "GROUP15 0x0F 0xAE /2", "ExpectedArm64ASM": [ - "ldr w20, [x4]", - "ubfx w20, w20, #13, #3", + "mov x20, x4", + "ldr w21, [x20]", + "ubfx w20, w21, #13, #3", "rbit w1, w20", "lsr w1, w1, #30", "mrs x0, fpcr", @@ -1360,22 +1579,24 @@ ] }, "wrfsbase eax": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP15 0x0F 0xAE /2", "ExpectedArm64ASM": [ - "mov w20, w4", - "str x20, [x28, #176]" + "mov x20, x4", + "mov w21, w20", + "str x21, [x28, #176]" ] }, "wrfsbase rax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP15 0x0F 0xAE /2", "ExpectedArm64ASM": [ - "str x4, [x28, #176]" + "mov x20, x4", + "str x20, [x28, #176]" ] }, "stmxcsr [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "GROUP15 0x0F 0xAE /3", "ExpectedArm64ASM": [ "mov w20, #0x1f80", @@ -1383,98 +1604,127 @@ "ubfx x21, x21, #22, #3", "rbit w0, w21", "bfi x21, x0, #30, #2", - "bfi w20, w21, #13, #3", - "str w20, [x4]" + "mov w22, w20", + "bfi w22, w21, #13, #3", + "mov x20, x4", + "str w22, [x20]" ] }, "wrgsbase eax": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP15 0x0F 0xAE /3", "ExpectedArm64ASM": [ - "mov w20, w4", - "str x20, [x28, #168]" + "mov x20, x4", + "mov w21, w20", + "str x21, [x28, #168]" ] }, "wrgsbase rax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP15 0x0F 0xAE /3", "ExpectedArm64ASM": [ - "str x4, [x28, #168]" + "mov x20, x4", + "str x20, [x28, #168]" ] }, "xsave [rax]": { - "ExpectedInstructionCount": 69, + "ExpectedInstructionCount": 94, "Comment": "GROUP15 0x0F 0xAE /4", "ExpectedArm64ASM": [ - "ubfx x20, x4, #0, #1", - "cbnz x20, #+0x8", - "b #+0x84", - "ldrh w20, [x28, #1024]", - "strh w20, [x4]", - "mov w20, #0x0", - "ldrb w21, [x28, #747]", - "bfi x20, x21, #11, #3", + "mov x20, x4", + "ubfx x21, x20, #0, #1", + "cbnz x21, #+0x8", + "b #+0x8c", + "mov x20, x4", + "ldrh w21, [x28, #1024]", + "strh w21, [x20]", + "mov w21, #0x0", + "ldrb w22, [x28, #747]", + "mov x23, x21", + "bfi x23, x22, #11, #3", "ldrb w21, [x28, #744]", "ldrb w22, [x28, #745]", - "ldrb w23, [x28, #746]", - "ldrb w24, [x28, #750]", - "orr x20, x20, x21, lsl #8", - "orr x20, x20, x22, lsl #9", - "orr x20, x20, x23, lsl #10", - "orr x20, x20, x24, lsl #14", - "strh w20, [x4, #2]", - "ldrb w20, [x28, #1026]", - "strb w20, [x4, #4]", + "ldrb w24, [x28, #746]", + "ldrb w25, [x28, #750]", + "orr x30, x23, x21, lsl #8", + "orr x21, x30, x22, lsl #9", + "orr x22, x21, x24, lsl #10", + "orr x21, x22, x25, lsl #14", + "strh w21, [x20, #2]", + "ldrb w21, [x28, #1026]", + "strb w21, [x20, #4]", "ldr q2, [x28, #768]", - "str q2, [x4, #32]", + "str q2, [x20, #32]", "ldr q2, [x28, #784]", - "str q2, [x4, #48]", + "str q2, [x20, #48]", "ldr q2, [x28, #800]", - "str q2, [x4, #64]", + "str q2, [x20, #64]", "ldr q2, [x28, #816]", - "str q2, [x4, #80]", + "str q2, [x20, #80]", "ldr q2, [x28, #832]", - "str q2, [x4, #96]", + "str q2, [x20, #96]", "ldr q2, [x28, #848]", - "str q2, [x4, #112]", + "str q2, [x20, #112]", "ldr q2, [x28, #864]", - "str q2, [x4, #128]", + "str q2, [x20, #128]", "ldr q2, [x28, #880]", - "str q2, [x4, #144]", - "ubfx x20, x4, #1, #1", - "cbnz x20, #+0x8", - "b #+0x44", - "str q16, [x4, #160]", - "str q17, [x4, #176]", - "str q18, [x4, #192]", - "str q19, [x4, #208]", - "str q20, [x4, #224]", - "str q21, [x4, #240]", - "str q22, [x4, #256]", - "str q23, [x4, #272]", - "str q24, [x4, #288]", - "str q25, [x4, #304]", - "str q26, [x4, #320]", - "str q27, [x4, #336]", - "str q28, [x4, #352]", - "str q29, [x4, #368]", - "str q30, [x4, #384]", - "str q31, [x4, #400]", - "ubfx x20, x4, #1, #2", - "cbnz x20, #+0x8", - "b #+0x2c", - "mov w20, #0x1f80", - "mrs x21, fpcr", - "ubfx x21, x21, #22, #3", - "rbit w0, w21", - "bfi x21, x0, #30, #2", - "bfi w20, w21, #13, #3", - "add x21, x4, #0x18 (24)", - "str w20, [x4, #24]", + "str q2, [x20, #144]", + "mov x20, x4", + "ubfx x21, x20, #1, #1", + "cbnz x21, #+0x8", + "b #+0x88", + "mov x20, x4", + "mov v2.16b, v16.16b", + "str q2, [x20, #160]", + "mov v2.16b, v17.16b", + "str q2, [x20, #176]", + "mov v2.16b, v18.16b", + "str q2, [x20, #192]", + "mov v2.16b, v19.16b", + "str q2, [x20, #208]", + "mov v2.16b, v20.16b", + "str q2, [x20, #224]", + "mov v2.16b, v21.16b", + "str q2, [x20, #240]", + "mov v2.16b, v22.16b", + "str q2, [x20, #256]", + "mov v2.16b, v23.16b", + "str q2, [x20, #272]", + "mov v2.16b, v24.16b", + "str q2, [x20, #288]", + "mov v2.16b, v25.16b", + "str q2, [x20, #304]", + "mov v2.16b, v26.16b", + "str q2, [x20, #320]", + "mov v2.16b, v27.16b", + "str q2, [x20, #336]", + "mov v2.16b, v28.16b", + "str q2, [x20, #352]", + "mov v2.16b, v29.16b", + "str q2, [x20, #368]", + "mov v2.16b, v30.16b", + "str q2, [x20, #384]", + "mov v2.16b, v31.16b", + "str q2, [x20, #400]", + "mov x20, x4", + "ubfx x21, x20, #1, #2", + "cbnz x21, #+0x8", + "b #+0x34", + "mov x20, x4", + "mov w21, #0x1f80", + "mrs x22, fpcr", + "ubfx x22, x22, #22, #3", + "rbit w0, w22", + "bfi x22, x0, #30, #2", + "mov w23, w21", + "bfi w23, w22, #13, #3", + "add x21, x20, #0x18 (24)", + "str w23, [x20, #24]", "mov w20, #0xffff", "str w20, [x21, #4]", - "ubfx x20, x4, #0, #3", - "str x20, [x4, #512]" + "mov x20, x4", + "ubfx x21, x20, #0, #3", + "str x21, [x20, #512]" ] }, "lfence": { @@ -1485,43 +1735,45 @@ ] }, "xrstor [rax]": { - "ExpectedInstructionCount": 105, + "ExpectedInstructionCount": 128, "Comment": "GROUP15 0x0F 0xAE /5", "ExpectedArm64ASM": [ - "ldr x20, [x4, #512]", - "ubfx x20, x20, #0, #1", + "mov x20, x4", + "ldr x21, [x20, #512]", + "ubfx x20, x21, #0, #1", "cbnz x20, #+0x8", - "b #+0x84", - "ldrh w20, [x4]", - "strh w20, [x28, #1024]", - "ldrh w20, [x4, #2]", - "ubfx w21, w20, #11, #3", - "strb w21, [x28, #747]", - "ubfx w21, w20, #8, #1", - "ubfx w22, w20, #9, #1", - "ubfx w23, w20, #10, #1", - "ubfx w20, w20, #14, #1", - "strb w21, [x28, #744]", - "strb w22, [x28, #745]", - "strb w23, [x28, #746]", - "strb w20, [x28, #750]", - "ldrb w20, [x4, #4]", - "strb w20, [x28, #1026]", - "ldr q2, [x4, #32]", + "b #+0x88", + "mov x20, x4", + "ldrh w21, [x20]", + "strh w21, [x28, #1024]", + "ldrh w21, [x20, #2]", + "ubfx w22, w21, #11, #3", + "strb w22, [x28, #747]", + "ubfx w22, w21, #8, #1", + "ubfx w23, w21, #9, #1", + "ubfx w24, w21, #10, #1", + "ubfx w25, w21, #14, #1", + "strb w22, [x28, #744]", + "strb w23, [x28, #745]", + "strb w24, [x28, #746]", + "strb w25, [x28, #750]", + "ldrb w21, [x20, #4]", + "strb w21, [x28, #1026]", + "ldr q2, [x20, #32]", "str q2, [x28, #768]", - "ldr q2, [x4, #48]", + "ldr q2, [x20, #48]", "str q2, [x28, #784]", - "ldr q2, [x4, #64]", + "ldr q2, [x20, #64]", "str q2, [x28, #800]", - "ldr q2, [x4, #80]", + "ldr q2, [x20, #80]", "str q2, [x28, #816]", - "ldr q2, [x4, #96]", + "ldr q2, [x20, #96]", "str q2, [x28, #832]", - "ldr q2, [x4, #112]", + "ldr q2, [x20, #112]", "str q2, [x28, #848]", - "ldr q2, [x4, #128]", + "ldr q2, [x20, #128]", "str q2, [x28, #864]", - "ldr q2, [x4, #144]", + "ldr q2, [x20, #144]", "str q2, [x28, #880]", "b #+0x4c", "mov w20, #0x0", @@ -1542,49 +1794,70 @@ "str q2, [x28, #848]", "str q2, [x28, #864]", "str q2, [x28, #880]", - "ldr x20, [x4, #512]", - "ubfx x20, x20, #1, #1", + "mov x20, x4", + "ldr x21, [x20, #512]", + "ubfx x20, x21, #1, #1", "cbnz x20, #+0x8", + "b #+0x8c", + "mov x20, x4", + "ldr q2, [x20, #160]", + "mov v16.16b, v2.16b", + "ldr q2, [x20, #176]", + "mov v17.16b, v2.16b", + "ldr q2, [x20, #192]", + "mov v18.16b, v2.16b", + "ldr q2, [x20, #208]", + "mov v19.16b, v2.16b", + "ldr q2, [x20, #224]", + "mov v20.16b, v2.16b", + "ldr q2, [x20, #240]", + "mov v21.16b, v2.16b", + "ldr q2, [x20, #256]", + "mov v22.16b, v2.16b", + "ldr q2, [x20, #272]", + "mov v23.16b, v2.16b", + "ldr q2, [x20, #288]", + "mov v24.16b, v2.16b", + "ldr q2, [x20, #304]", + "mov v25.16b, v2.16b", + "ldr q2, [x20, #320]", + "mov v26.16b, v2.16b", + "ldr q2, [x20, #336]", + "mov v27.16b, v2.16b", + "ldr q2, [x20, #352]", + "mov v28.16b, v2.16b", + "ldr q2, [x20, #368]", + "mov v29.16b, v2.16b", + "ldr q2, [x20, #384]", + "mov v30.16b, v2.16b", + "ldr q2, [x20, #400]", + "mov v31.16b, v2.16b", "b #+0x48", - "ldr q16, [x4, #160]", - "ldr q17, [x4, #176]", - "ldr q18, [x4, #192]", - "ldr q19, [x4, #208]", - "ldr q20, [x4, #224]", - "ldr q21, [x4, #240]", - "ldr q22, [x4, #256]", - "ldr q23, [x4, #272]", - "ldr q24, [x4, #288]", - "ldr q25, [x4, #304]", - "ldr q26, [x4, #320]", - "ldr q27, [x4, #336]", - "ldr q28, [x4, #352]", - "ldr q29, [x4, #368]", - "ldr q30, [x4, #384]", - "ldr q31, [x4, #400]", - "b #+0x44", - "movi v16.2d, #0x0", - "mov v17.16b, v16.16b", - "mov v18.16b, v16.16b", - "mov v19.16b, v16.16b", - "mov v20.16b, v16.16b", - "mov v21.16b, v16.16b", - "mov v22.16b, v16.16b", - "mov v23.16b, v16.16b", - "mov v24.16b, v16.16b", - "mov v25.16b, v16.16b", - "mov v26.16b, v16.16b", - "mov v27.16b, v16.16b", - "mov v28.16b, v16.16b", - "mov v29.16b, v16.16b", - "mov v30.16b, v16.16b", - "mov v31.16b, v16.16b", - "ldr x20, [x4, #512]", - "ubfx x20, x20, #1, #2", + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b", + "mov v17.16b, v2.16b", + "mov v18.16b, v2.16b", + "mov v19.16b, v2.16b", + "mov v20.16b, v2.16b", + "mov v21.16b, v2.16b", + "mov v22.16b, v2.16b", + "mov v23.16b, v2.16b", + "mov v24.16b, v2.16b", + "mov v25.16b, v2.16b", + "mov v26.16b, v2.16b", + "mov v27.16b, v2.16b", + "mov v28.16b, v2.16b", + "mov v29.16b, v2.16b", + "mov v30.16b, v2.16b", + "mov v31.16b, v2.16b", + "mov x20, x4", + "ldr x21, [x20, #512]", + "ubfx x20, x21, #1, #2", "cbnz x20, #+0x8", - "b #+0x2c", - "ldr w20, [x4, #24]", - "ubfx w20, w20, #13, #3", + "b #+0x30", + "mov x20, x4", + "ldr w21, [x20, #24]", + "ubfx w20, w21, #13, #3", "rbit w1, w20", "lsr w1, w1, #30", "mrs x0, fpcr", @@ -1603,10 +1876,11 @@ ] }, "clwb [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP15 0x0F 0xAE /6", "ExpectedArm64ASM": [ - "dc cvac, x4" + "mov x20, x4", + "dc cvac, x20" ] }, "sfence": { @@ -1617,54 +1891,60 @@ ] }, "clflush [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP15 0x0F 0xAE /7", "ExpectedArm64ASM": [ - "dc civac, x4", + "mov x20, x4", + "dc civac, x20", "dsb ish" ] }, "clflushopt [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP15 0x0F 0xAE /7", "ExpectedArm64ASM": [ - "dc civac, x4" + "mov x20, x4", + "dc civac, x20" ] }, "prefetchnta [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "GROUP16 0x0F 0x18 /0" ], "ExpectedArm64ASM": [ - "prfm pldl1strm, [x4]" + "mov x20, x4", + "prfm pldl1strm, [x20]" ] }, "prefetcht0 [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "GROUP16 0x0F 0x18 /1" ], "ExpectedArm64ASM": [ - "prfm pldl1keep, [x4]" + "mov x20, x4", + "prfm pldl1keep, [x20]" ] }, "prefetcht1 [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "GROUP16 0x0F 0x18 /2" ], "ExpectedArm64ASM": [ - "prfm pldl2keep, [x4]" + "mov x20, x4", + "prfm pldl2keep, [x20]" ] }, "prefetcht2 [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "GROUP16 0x0F 0x18 /3" ], "ExpectedArm64ASM": [ - "prfm pldl3keep, [x4]" + "mov x20, x4", + "prfm pldl3keep, [x20]" ] }, "db 0x0f, 0x18, 0x20;": { @@ -1677,31 +1957,34 @@ "ExpectedArm64ASM": [] }, "db 0x0f, 0x0d, 0x00": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "GROUPP 0x0F 0x0D /0", "prefetch_exclusive [rax]" ], "ExpectedArm64ASM": [ - "prfm pldl1keep, [x4]" + "mov x20, x4", + "prfm pldl1keep, [x20]" ] }, "prefetchw [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "GROUPP 0x0F 0x0D /1" ], "ExpectedArm64ASM": [ - "prfm pstl1keep, [x4]" + "mov x20, x4", + "prfm pstl1keep, [x20]" ] }, "prefetchwt1 [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "GROUPP 0x0F 0x0D /2" ], "ExpectedArm64ASM": [ - "prfm pstl1keep, [x4]" + "mov x20, x4", + "prfm pstl1keep, [x20]" ] } } diff --git a/unittests/InstructionCountCI/FlagM/SecondaryModRM.json b/unittests/InstructionCountCI/FlagM/SecondaryModRM.json index ed95f77d38..85103990db 100644 --- a/unittests/InstructionCountCI/FlagM/SecondaryModRM.json +++ b/unittests/InstructionCountCI/FlagM/SecondaryModRM.json @@ -14,9 +14,10 @@ }, "Instructions": { "xgetbv": { - "ExpectedInstructionCount": 54, + "ExpectedInstructionCount": 57, "Comment": "0xF 0x01 /2 RM-0", "ExpectedArm64ASM": [ + "mov x20, x5", "sub sp, sp, #0xf0 (240)", "mov x3, sp", "st1 {v2.2d, v3.2d}, [x3], #32", @@ -41,7 +42,7 @@ "st1 {v20.2d, v21.2d, v22.2d, v23.2d}, [x3], #64", "st1 {v24.2d, v25.2d, v26.2d, v27.2d}, [x3], #64", "st1 {v28.2d, v29.2d, v30.2d, v31.2d}, [x3], #64", - "mov w1, w5", + "mov w1, w20", "ldr x0, [x28, #1112]", "ldr x2, [x28, #1128]", "blr x2", @@ -67,20 +68,22 @@ "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp], #64", "ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov w20, w0", - "lsr x21, x0, #32", - "mov w4, w20", - "mov w6, w21" + "mov w22, w0", + "lsr x23, x0, #32", + "mov w20, w22", + "mov w21, w23", + "mov x4, x20", + "mov x6, x21" ] }, "rdtscp": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 24, "Comment": "0xF 0x01 /7 RM-1", "ExpectedArm64ASM": [ "dmb ld", "mrs x20, S3_3_c14_c0_2", - "lsl w4, w20, #7", - "lsr x6, x20, #25", + "lsl w21, w20, #7", + "lsr x22, x20, #25", "mrs x0, nzcv", "str w0, [x28, #728]", "str x8, [x28, #40]", @@ -97,7 +100,10 @@ "msr nzcv, x8", "ldr x8, [x28, #40]", "str xzr, [x28, #1056]", - "orr x5, x0, x1, lsl #12" + "orr x20, x0, x1, lsl #12", + "mov x4, x21", + "mov x5, x20", + "mov x6, x22" ] } } diff --git a/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json b/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json index 24169c7eae..c0b4ccc9e1 100644 --- a/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json +++ b/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json @@ -13,48 +13,62 @@ }, "Instructions": { "ucomisd xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x66 0x0f 0x2e", "ExpectedArm64ASM": [ - "fcmp d16, d17", - "mov w27, #0x0", - "cset w26, vc", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fcmp d2, d3", + "mov w20, #0x0", + "cset w21, vc", + "mov x26, x21", "axflag", - "cfinv" + "cfinv", + "mov x27, x20" ] }, "comisd xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x66 0x0f 0x2f", "ExpectedArm64ASM": [ - "fcmp d16, d17", - "mov w27, #0x0", - "cset w26, vc", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fcmp d2, d3", + "mov w20, #0x0", + "cset w21, vc", + "mov x26, x21", "axflag", - "cfinv" + "cfinv", + "mov x27, x20" ] }, "pmovmskb eax, xmm0": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x66 0x0f 0xd7", "ExpectedArm64ASM": [ - "ldr q2, [x28, #2272]", - "cmlt v3.16b, v16.16b, #0", - "and v2.16b, v3.16b, v2.16b", - "addp v2.16b, v2.16b, v2.16b", - "addp v2.8b, v2.8b, v2.8b", - "addp v2.8b, v2.8b, v2.8b", - "umov w4, v2.h[0]" + "mov v2.16b, v16.16b", + "ldr q3, [x28, #2272]", + "cmlt v4.16b, v2.16b, #0", + "and v2.16b, v4.16b, v3.16b", + "addp v3.16b, v2.16b, v2.16b", + "addp v2.8b, v3.8b, v3.8b", + "addp v3.8b, v2.8b, v2.8b", + "umov w20, v3.h[0]", + "mov x4, x20" ] }, "maskmovdqu xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": "0x66 0x0f 0xf7", "ExpectedArm64ASM": [ - "cmlt v2.16b, v17.16b, #0", - "ldr q3, [x11]", - "bsl v2.16b, v16.16b, v3.16b", - "str q2, [x11]" + "mov v2.16b, v17.16b", + "cmlt v3.16b, v2.16b, #0", + "mov v2.16b, v16.16b", + "mov x20, x11", + "ldr q4, [x20]", + "mov v5.16b, v3.16b", + "bsl v5.16b, v2.16b, v4.16b", + "str q5, [x20]" ] } } diff --git a/unittests/InstructionCountCI/FlagM/Secondary_REP.json b/unittests/InstructionCountCI/FlagM/Secondary_REP.json index d31556cba0..96a33f638a 100644 --- a/unittests/InstructionCountCI/FlagM/Secondary_REP.json +++ b/unittests/InstructionCountCI/FlagM/Secondary_REP.json @@ -14,106 +14,136 @@ }, "Instructions": { "popcnt ax, bx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 15, "Comment": "0xf3 0x0f 0xb8", "ExpectedArm64ASM": [ - "uxth w20, w7", - "fmov s0, w20", + "mov x20, x7", + "uxth w21, w20", + "fmov s0, w21", "cnt v0.8b, v0.8b", "addp v0.8b, v0.8b, v0.8b", "umov w20, v0.b[0]", - "bfxil x4, x20, #0, #16", + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22", "tst w20, w20", - "mov w26, #0x1", - "mov w27, #0x0" + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20" ] }, "popcnt eax, ebx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0xf3 0x0f 0xb8", "ExpectedArm64ASM": [ - "fmov s0, w7", + "mov x20, x7", + "fmov s0, w20", "cnt v0.8b, v0.8b", "addv b0, v0.8b", - "umov w4, v0.b[0]", - "tst w4, w4", - "mov w26, #0x1", - "mov w27, #0x0" + "umov w21, v0.b[0]", + "mov x4, x21", + "tst w21, w21", + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20" ] }, "popcnt rax, rbx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0xf3 0x0f 0xb8", "ExpectedArm64ASM": [ - "fmov d0, x7", + "mov x20, x7", + "fmov d0, x20", "cnt v0.8b, v0.8b", "addv b0, v0.8b", - "umov w4, v0.b[0]", - "tst w4, w4", - "mov w26, #0x1", - "mov w27, #0x0" + "umov w21, v0.b[0]", + "mov x4, x21", + "tst w21, w21", + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20" ] }, "tzcnt ax, bx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": "0xf3 0x0f 0xbc", "ExpectedArm64ASM": [ - "rbit w20, w7", - "orr w20, w20, #0x8000", - "clz w20, w20", - "bfxil x4, x20, #0, #16", - "cmn wzr, w20, lsl #16", - "rmif x20, #3, #nzCv" + "mov x20, x7", + "rbit w21, w20", + "orr w21, w21, #0x8000", + "clz w21, w21", + "mov x20, x4", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22", + "cmn wzr, w21, lsl #16", + "rmif x21, #3, #nzCv" ] }, "tzcnt eax, ebx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0xf3 0x0f 0xbc", "ExpectedArm64ASM": [ - "rbit w4, w7", - "clz w4, w4", - "tst w4, w4", - "rmif x4, #4, #nzCv" + "mov x20, x7", + "rbit w21, w20", + "clz w21, w21", + "mov x4, x21", + "tst w21, w21", + "rmif x21, #4, #nzCv" ] }, "tzcnt rax, rbx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0xf3 0x0f 0xbc", "ExpectedArm64ASM": [ - "rbit x4, x7", - "clz x4, x4", - "tst x4, x4", - "rmif x4, #5, #nzCv" + "mov x20, x7", + "rbit x21, x20", + "clz x21, x21", + "mov x4, x21", + "tst x21, x21", + "rmif x21, #5, #nzCv" ] }, "lzcnt ax, bx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": "0xf3 0x0f 0xbd", "ExpectedArm64ASM": [ - "lsl w20, w7, #16", - "orr w20, w20, #0x8000", - "clz w20, w20", - "bfxil x4, x20, #0, #16", - "cmn wzr, w20, lsl #16", - "rmif x20, #3, #nzCv" + "mov x20, x7", + "lsl w21, w20, #16", + "orr w21, w21, #0x8000", + "clz w21, w21", + "mov x20, x4", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22", + "cmn wzr, w21, lsl #16", + "rmif x21, #3, #nzCv" ] }, "lzcnt eax, ebx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0xf3 0x0f 0xbd", "ExpectedArm64ASM": [ - "clz w4, w7", - "tst w4, w4", - "rmif x4, #4, #nzCv" + "mov x20, x7", + "clz w21, w20", + "mov x4, x21", + "tst w21, w21", + "rmif x21, #4, #nzCv" ] }, "lzcnt rax, rbx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0xf3 0x0f 0xbd", "ExpectedArm64ASM": [ - "clz x4, x7", - "tst x4, x4", - "rmif x4, #5, #nzCv" + "mov x20, x7", + "clz x21, x20", + "mov x4, x21", + "tst x21, x21", + "rmif x21, #5, #nzCv" ] } } diff --git a/unittests/InstructionCountCI/FlagM/VEX_map1.json b/unittests/InstructionCountCI/FlagM/VEX_map1.json index 8fe066e27b..a531f4293f 100644 --- a/unittests/InstructionCountCI/FlagM/VEX_map1.json +++ b/unittests/InstructionCountCI/FlagM/VEX_map1.json @@ -15,108 +15,132 @@ }, "Instructions": { "vucomiss xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b00 0x2e 128-bit" ], "ExpectedArm64ASM": [ - "fcmp s16, s17", - "mov w27, #0x0", - "cset w26, vc", + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "fcmp s2, s3", + "mov w20, #0x0", + "cset w21, vc", + "mov x26, x21", "axflag", - "cfinv" + "cfinv", + "mov x27, x20" ] }, "vucomisd xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b01 0x2e 128-bit" ], "ExpectedArm64ASM": [ - "fcmp d16, d17", - "mov w27, #0x0", - "cset w26, vc", + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "fcmp d2, d3", + "mov w20, #0x0", + "cset w21, vc", + "mov x26, x21", "axflag", - "cfinv" + "cfinv", + "mov x27, x20" ] }, "vcomiss xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b00 0x2f 128-bit" ], "ExpectedArm64ASM": [ - "fcmp s16, s17", - "mov w27, #0x0", - "cset w26, vc", + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "fcmp s2, s3", + "mov w20, #0x0", + "cset w21, vc", + "mov x26, x21", "axflag", - "cfinv" + "cfinv", + "mov x27, x20" ] }, "vcomisd xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b01 0x2f 128-bit" ], "ExpectedArm64ASM": [ - "fcmp d16, d17", - "mov w27, #0x0", - "cset w26, vc", + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "fcmp d2, d3", + "mov w20, #0x0", + "cset w21, vc", + "mov x26, x21", "axflag", - "cfinv" + "cfinv", + "mov x27, x20" ] }, "vpmovmskb rax, xmm0": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b01 0xd7 256-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2272]", - "cmlt v3.16b, v16.16b, #0", - "and v2.16b, v3.16b, v2.16b", - "addp v2.16b, v2.16b, v2.16b", - "addp v2.8b, v2.8b, v2.8b", - "addp v2.8b, v2.8b, v2.8b", - "umov w4, v2.h[0]" + "mov z2.d, p7/m, z16.d", + "ldr q3, [x28, #2272]", + "cmlt v4.16b, v2.16b, #0", + "and v2.16b, v4.16b, v3.16b", + "addp v3.16b, v2.16b, v2.16b", + "addp v2.8b, v3.8b, v3.8b", + "addp v3.8b, v2.8b, v2.8b", + "umov w20, v3.h[0]", + "mov x4, x20" ] }, "vpmovmskb rax, ymm0": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "Map 1 0b01 0xd7 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z16.d", "ldr x0, [x28, #1680]", - "ld1b {z2.b}, p7/z, [x0]", + "ld1b {z3.b}, p7/z, [x0]", "mrs x0, nzcv", "mov z0.d, #0", - "cmplt p0.b, p7/z, z16.b, #0", - "not z0.b, p0/m, z16.b", - "orr z0.b, p0/m, z0.b, z16.b", - "mov z3.d, z0.d", + "cmplt p0.b, p7/z, z2.b, #0", + "not z0.b, p0/m, z2.b", + "orr z0.b, p0/m, z0.b, z2.b", + "mov z4.d, z0.d", "msr nzcv, x0", - "and z2.d, z3.d, z2.d", + "and z2.d, z4.d, z3.d", "movprfx z0, z2", "addp z0.b, p7/m, z0.b, z2.b", - "uzp1 z2.b, z0.b, z0.b", + "uzp1 z3.b, z0.b, z0.b", "uzp2 z1.b, z0.b, z0.b", - "splice z2.d, p6, z2.d, z1.d", - "addp v2.16b, v2.16b, v2.16b", - "addp v2.8b, v2.8b, v2.8b", - "mov w4, v2.s[0]" + "splice z3.d, p6, z3.d, z1.d", + "addp v2.16b, v3.16b, v3.16b", + "addp v3.8b, v2.8b, v2.8b", + "mov w20, v3.s[0]", + "mov x4, x20" ] }, "vmaskmovdqu xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b01 0xf7 128-bit" ], "ExpectedArm64ASM": [ - "cmlt v2.16b, v17.16b, #0", - "ldr q3, [x11]", - "bsl v2.16b, v16.16b, v3.16b", - "str q2, [x11]" + "mov z2.d, p7/m, z17.d", + "cmlt v3.16b, v2.16b, #0", + "mov z2.d, p7/m, z16.d", + "mov x20, x11", + "ldr q4, [x20]", + "mov v5.16b, v3.16b", + "bsl v5.16b, v2.16b, v4.16b", + "str q5, [x20]" ] } } diff --git a/unittests/InstructionCountCI/FlagM/VEX_map2.json b/unittests/InstructionCountCI/FlagM/VEX_map2.json index f83e40ecdd..7d78d439f9 100644 --- a/unittests/InstructionCountCI/FlagM/VEX_map2.json +++ b/unittests/InstructionCountCI/FlagM/VEX_map2.json @@ -12,473 +12,569 @@ }, "Instructions": { "vtestps xmm0, xmm1": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 20, "Comment": [ "Map 2 0b01 0x0e 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", "mov w20, #0x80000000", - "dup v2.4s, w20", - "and v3.16b, v17.16b, v16.16b", - "bic v4.16b, v17.16b, v16.16b", - "and v3.16b, v3.16b, v2.16b", - "and v2.16b, v4.16b, v2.16b", - "umaxv h3, v3.8h", - "umaxv h2, v2.8h", - "umov w20, v3.h[0]", + "dup v4.4s, w20", + "and v5.16b, v3.16b, v2.16b", + "bic v6.16b, v3.16b, v2.16b", + "and v2.16b, v5.16b, v4.16b", + "and v3.16b, v6.16b, v4.16b", + "umaxv h4, v2.8h", + "umaxv h2, v3.8h", + "umov w20, v4.h[0]", "umov w21, v2.h[0]", - "mov w27, #0x0", - "mov w26, #0x1", + "mov w22, #0x0", + "mov w23, #0x1", "cmp x21, #0x0 (0)", - "cset x21, eq", + "cset x24, eq", "tst w20, w20", - "rmif x21, #63, #nzCv" + "rmif x24, #63, #nzCv", + "mov x26, x23", + "mov x27, x22" ] }, "vtestps ymm0, ymm1": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 20, "Comment": [ "Map 2 0b01 0x0e 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", "mov w20, #0x80000000", - "mov z2.s, w20", - "and z3.d, z17.d, z16.d", - "bic z4.d, z17.d, z16.d", - "and z3.d, z3.d, z2.d", - "and z2.d, z4.d, z2.d", - "umaxv h3, p7, z3.h", - "umaxv h2, p7, z2.h", - "umov w20, v3.h[0]", + "mov z4.s, w20", + "and z5.d, z3.d, z2.d", + "bic z6.d, z3.d, z2.d", + "and z2.d, z5.d, z4.d", + "and z3.d, z6.d, z4.d", + "umaxv h4, p7, z2.h", + "umaxv h2, p7, z3.h", + "umov w20, v4.h[0]", "umov w21, v2.h[0]", - "mov w27, #0x0", - "mov w26, #0x1", + "mov w22, #0x0", + "mov w23, #0x1", "cmp x21, #0x0 (0)", - "cset x21, eq", + "cset x24, eq", "tst w20, w20", - "rmif x21, #63, #nzCv" + "rmif x24, #63, #nzCv", + "mov x26, x23", + "mov x27, x22" ] }, "vtestpd xmm0, xmm1": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 20, "Comment": [ "Map 2 0b01 0x0f 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", "mov x20, #0x8000000000000000", - "dup v2.2d, x20", - "and v3.16b, v17.16b, v16.16b", - "bic v4.16b, v17.16b, v16.16b", - "and v3.16b, v3.16b, v2.16b", - "and v2.16b, v4.16b, v2.16b", - "umaxv h3, v3.8h", - "umaxv h2, v2.8h", - "umov w20, v3.h[0]", + "dup v4.2d, x20", + "and v5.16b, v3.16b, v2.16b", + "bic v6.16b, v3.16b, v2.16b", + "and v2.16b, v5.16b, v4.16b", + "and v3.16b, v6.16b, v4.16b", + "umaxv h4, v2.8h", + "umaxv h2, v3.8h", + "umov w20, v4.h[0]", "umov w21, v2.h[0]", - "mov w27, #0x0", - "mov w26, #0x1", + "mov w22, #0x0", + "mov w23, #0x1", "cmp x21, #0x0 (0)", - "cset x21, eq", + "cset x24, eq", "tst w20, w20", - "rmif x21, #63, #nzCv" + "rmif x24, #63, #nzCv", + "mov x26, x23", + "mov x27, x22" ] }, "vtestpd ymm0, ymm1": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 20, "Comment": [ "Map 2 0b01 0x0f 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", "mov x20, #0x8000000000000000", - "mov z2.d, x20", - "and z3.d, z17.d, z16.d", - "bic z4.d, z17.d, z16.d", - "and z3.d, z3.d, z2.d", - "and z2.d, z4.d, z2.d", - "umaxv h3, p7, z3.h", - "umaxv h2, p7, z2.h", - "umov w20, v3.h[0]", + "mov z4.d, x20", + "and z5.d, z3.d, z2.d", + "bic z6.d, z3.d, z2.d", + "and z2.d, z5.d, z4.d", + "and z3.d, z6.d, z4.d", + "umaxv h4, p7, z2.h", + "umaxv h2, p7, z3.h", + "umov w20, v4.h[0]", "umov w21, v2.h[0]", - "mov w27, #0x0", - "mov w26, #0x1", + "mov w22, #0x0", + "mov w23, #0x1", "cmp x21, #0x0 (0)", - "cset x21, eq", + "cset x24, eq", "tst w20, w20", - "rmif x21, #63, #nzCv" + "rmif x24, #63, #nzCv", + "mov x26, x23", + "mov x27, x22" ] }, "vptest xmm0, xmm1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 16, "Comment": [ "Map 2 0b01 0x16 128-bit" ], "ExpectedArm64ASM": [ - "and v2.16b, v16.16b, v17.16b", - "bic v3.16b, v17.16b, v16.16b", - "umaxv h2, v2.8h", - "umaxv h3, v3.8h", + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "and v4.16b, v2.16b, v3.16b", + "bic v5.16b, v3.16b, v2.16b", + "umaxv h2, v4.8h", + "umaxv h3, v5.8h", "umov w20, v2.h[0]", "umov w21, v3.h[0]", - "mov w27, #0x0", - "mov w26, #0x1", + "mov w22, #0x0", + "mov w23, #0x1", "cmp x21, #0x0 (0)", - "cset x21, eq", + "cset x24, eq", "tst w20, w20", - "rmif x21, #63, #nzCv" + "rmif x24, #63, #nzCv", + "mov x26, x23", + "mov x27, x22" ] }, "vptest ymm0, ymm1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 16, "Comment": [ "Map 2 0b01 0x16 256-bit" ], "ExpectedArm64ASM": [ - "and z2.d, z16.d, z17.d", - "bic z3.d, z17.d, z16.d", - "umaxv h2, p7, z2.h", - "umaxv h3, p7, z3.h", + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "and z4.d, z2.d, z3.d", + "bic z5.d, z3.d, z2.d", + "umaxv h2, p7, z4.h", + "umaxv h3, p7, z5.h", "umov w20, v2.h[0]", "umov w21, v3.h[0]", - "mov w27, #0x0", - "mov w26, #0x1", + "mov w22, #0x0", + "mov w23, #0x1", "cmp x21, #0x0 (0)", - "cset x21, eq", + "cset x24, eq", "tst w20, w20", - "rmif x21, #63, #nzCv" + "rmif x24, #63, #nzCv", + "mov x26, x23", + "mov x27, x22" ] }, "vmaskmovps xmm0, xmm1, [rax]": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 2 0b01 0x2c 128-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.s, p6/z, z17.s, #0", - "ld1w {z2.s}, p0/z, [x4]", - "mov v16.16b, v2.16b", - "msr nzcv, x20" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.s, p6/z, z2.s, #0", + "ld1w {z3.s}, p0/z, [x20]", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d", + "msr nzcv, x21" ] }, "vmaskmovps ymm0, ymm1, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x2c 256-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.s, p7/z, z17.s, #0", - "ld1w {z16.s}, p0/z, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.s, p7/z, z2.s, #0", + "ld1w {z3.s}, p0/z, [x20]", + "mov z16.d, p7/m, z3.d", + "msr nzcv, x21" ] }, "vmaskmovpd xmm0, xmm1, [rax]": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 2 0b01 0x2d 128-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.d, p6/z, z17.d, #0", - "ld1d {z2.d}, p0/z, [x4]", - "mov v16.16b, v2.16b", - "msr nzcv, x20" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.d, p6/z, z2.d, #0", + "ld1d {z3.d}, p0/z, [x20]", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d", + "msr nzcv, x21" ] }, "vmaskmovpd ymm0, ymm1, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x2d 256-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.d, p7/z, z17.d, #0", - "ld1d {z16.d}, p0/z, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.d, p7/z, z2.d, #0", + "ld1d {z3.d}, p0/z, [x20]", + "mov z16.d, p7/m, z3.d", + "msr nzcv, x21" ] }, "vmaskmovps [rax], xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x2e 128-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.s, p6/z, z16.s, #0", - "st1w {z17.s}, p0, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.s, p6/z, z2.s, #0", + "st1w {z3.s}, p0, [x20]", + "msr nzcv, x21" ] }, "vmaskmovps [rax], ymm0, ymm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x2e 256-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.s, p7/z, z16.s, #0", - "st1w {z17.s}, p0, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.s, p7/z, z2.s, #0", + "st1w {z3.s}, p0, [x20]", + "msr nzcv, x21" ] }, "vmaskmovpd [rax], xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x2f 128-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.d, p6/z, z16.d, #0", - "st1d {z17.d}, p0, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.d, p6/z, z2.d, #0", + "st1d {z3.d}, p0, [x20]", + "msr nzcv, x21" ] }, "vmaskmovpd [rax], ymm0, ymm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x2f 256-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.d, p7/z, z16.d, #0", - "st1d {z17.d}, p0, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.d, p7/z, z2.d, #0", + "st1d {z3.d}, p0, [x20]", + "msr nzcv, x21" ] }, "vpmaskmovd xmm0, xmm1, [rax]": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 2 0b01 0x8c 128-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.s, p6/z, z17.s, #0", - "ld1w {z2.s}, p0/z, [x4]", - "mov v16.16b, v2.16b", - "msr nzcv, x20" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.s, p6/z, z2.s, #0", + "ld1w {z3.s}, p0/z, [x20]", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d", + "msr nzcv, x21" ] }, "vpmaskmovd ymm0, ymm1, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x8c 256-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.s, p7/z, z17.s, #0", - "ld1w {z16.s}, p0/z, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.s, p7/z, z2.s, #0", + "ld1w {z3.s}, p0/z, [x20]", + "mov z16.d, p7/m, z3.d", + "msr nzcv, x21" ] }, "vpmaskmovq xmm0, xmm1, [rax]": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 2 0b01 0x8c 128-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.d, p6/z, z17.d, #0", - "ld1d {z2.d}, p0/z, [x4]", - "mov v16.16b, v2.16b", - "msr nzcv, x20" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.d, p6/z, z2.d, #0", + "ld1d {z3.d}, p0/z, [x20]", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d", + "msr nzcv, x21" ] }, "vpmaskmovq ymm0, ymm1, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x8c 256-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.d, p7/z, z17.d, #0", - "ld1d {z16.d}, p0/z, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.d, p7/z, z2.d, #0", + "ld1d {z3.d}, p0/z, [x20]", + "mov z16.d, p7/m, z3.d", + "msr nzcv, x21" ] }, "vpmaskmovd [rax], xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x8e 128-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.s, p6/z, z16.s, #0", - "st1w {z17.s}, p0, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.s, p6/z, z2.s, #0", + "st1w {z3.s}, p0, [x20]", + "msr nzcv, x21" ] }, "vpmaskmovd [rax], ymm0, ymm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x8e 256-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.s, p7/z, z16.s, #0", - "st1w {z17.s}, p0, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.s, p7/z, z2.s, #0", + "st1w {z3.s}, p0, [x20]", + "msr nzcv, x21" ] }, "vpmaskmovq [rax], xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x8e 128-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.d, p6/z, z16.d, #0", - "st1d {z17.d}, p0, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.d, p6/z, z2.d, #0", + "st1d {z3.d}, p0, [x20]", + "msr nzcv, x21" ] }, "vpmaskmovq [rax], ymm0, ymm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x8e 256-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.d, p7/z, z16.d, #0", - "st1d {z17.d}, p0, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.d, p7/z, z2.d, #0", + "st1d {z3.d}, p0, [x20]", + "msr nzcv, x21" ] }, "andn eax, ebx, ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b00 0xf2 32-bit" ], "ExpectedArm64ASM": [ - "bic w4, w5, w7", - "mov x26, x4", - "tst w4, w4" + "mov x20, x7", + "mov x21, x5", + "bic w22, w21, w20", + "mov x4, x22", + "mov x26, x22", + "tst w22, w22" ] }, "andn rax, rbx, rcx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b00 0xf2 64-bit" ], "ExpectedArm64ASM": [ - "bic x4, x5, x7", - "mov x26, x4", - "tst x4, x4" + "mov x20, x7", + "mov x21, x5", + "bic x22, x21, x20", + "mov x4, x22", + "mov x26, x22", + "tst x22, x22" ] }, "bzhi eax, ebx, ecx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": [ "Map 2 0b00 0xf5 32-bit" ], "ExpectedArm64ASM": [ - "mov w20, #0xffffffff", - "lsl w20, w20, w5", - "bic w20, w7, w20", - "tst x5, #0xe0", - "csel w4, w7, w20, ne", + "mov x20, x7", + "mov x21, x5", + "mov w22, #0xffffffff", + "lsl w23, w22, w21", + "bic w22, w20, w23", + "tst x21, #0xe0", + "csel w21, w20, w22, ne", + "mov x4, x21", "cset w20, ne", - "tst w4, w4", + "tst w21, w21", "rmif x20, #63, #nzCv" ] }, "bzhi rax, rbx, rcx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": [ "Map 2 0b00 0xf5 64-bit" ], "ExpectedArm64ASM": [ - "mov x20, #0xffffffffffffffff", - "lsl x20, x20, x5", - "bic x20, x7, x20", - "tst x5, #0xc0", - "csel x4, x7, x20, ne", + "mov x20, x7", + "mov x21, x5", + "mov x22, #0xffffffffffffffff", + "lsl x23, x22, x21", + "bic x22, x20, x23", + "tst x21, #0xc0", + "csel x21, x20, x22, ne", + "mov x4, x21", "cset w20, ne", - "tst x4, x4", + "tst x21, x21", "rmif x20, #63, #nzCv" ] }, "pdep eax, ebx, ecx": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "Map 2 0b11 0xf5 32-bit" ], "ExpectedArm64ASM": [ - "mov x0, x7", - "mov x1, x5", - "mov w4, #0x0", - "cbz w5, #+0x2c", + "mov x20, x7", + "mov x21, x5", + "mov x0, x20", + "mov x1, x21", + "mov w22, #0x0", + "cbz w21, #+0x2c", "neg w2, w1", "and w2, w2, w1", "sbfx w3, w0, #0, #1", "eor w1, w1, w2", "and w2, w3, w2", "neg w3, w1", - "orr w4, w4, w2", + "orr w22, w22, w2", "lsr w0, w0, #1", "and w2, w1, w3", - "cbnz w2, #-0x1c" + "cbnz w2, #-0x1c", + "mov x4, x22" ] }, "pdep rax, rbx, rcx": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "Map 2 0b11 0xf5 64-bit" ], "ExpectedArm64ASM": [ - "mov x0, x7", - "mov x1, x5", - "mov x4, #0x0", - "cbz x5, #+0x2c", + "mov x20, x7", + "mov x21, x5", + "mov x0, x20", + "mov x1, x21", + "mov x22, #0x0", + "cbz x21, #+0x2c", "neg x2, x1", "and x2, x2, x1", "sbfx x3, x0, #0, #1", "eor x1, x1, x2", "and x2, x3, x2", "neg x3, x1", - "orr x4, x4, x2", + "orr x22, x22, x2", "lsr x0, x0, #1", "and x2, x1, x3", - "cbnz x2, #-0x1c" + "cbnz x2, #-0x1c", + "mov x4, x22" ] }, "bextr eax, ebx, ecx": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 15, "Comment": [ "Map 2 0b00 0xf7 32-bit" ], "ExpectedArm64ASM": [ - "uxtb w20, w5", - "lsr w21, w7, w20", - "mov w22, #0x0", + "mov x20, x7", + "mov x21, x5", + "uxtb w22, w21", + "lsr w23, w20, w22", + "mov w20, #0x0", + "cmp w22, #0x1f (31)", + "csel w24, w23, w20, ls", + "ubfx w20, w21, #8, #8", + "mov x21, #0xffffffffffffffff", + "lsl w22, w21, w20", + "bic w21, w24, w22", "cmp w20, #0x1f (31)", - "csel w20, w21, w22, ls", - "ubfx w21, w5, #8, #8", - "mov x22, #0xffffffffffffffff", - "lsl w22, w22, w21", - "bic w22, w20, w22", - "cmp w21, #0x1f (31)", - "csel w4, w22, w20, ls", - "tst w4, w4" + "csel w22, w21, w24, ls", + "mov x4, x22", + "tst w22, w22" ] }, "bextr rax, rbx, rcx": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 15, "Comment": [ "Map 2 0b00 0xf7 64-bit" ], "ExpectedArm64ASM": [ - "uxtb x20, w5", - "lsr x21, x7, x20", - "mov w22, #0x0", + "mov x20, x7", + "mov x21, x5", + "uxtb x22, w21", + "lsr x23, x20, x22", + "mov w20, #0x0", + "cmp x22, #0x3f (63)", + "csel x24, x23, x20, ls", + "ubfx x20, x21, #8, #8", + "mov x21, #0xffffffffffffffff", + "lsl x22, x21, x20", + "bic x21, x24, x22", "cmp x20, #0x3f (63)", - "csel x20, x21, x22, ls", - "ubfx x21, x5, #8, #8", - "mov x22, #0xffffffffffffffff", - "lsl x22, x22, x21", - "bic x22, x20, x22", - "cmp x21, #0x3f (63)", - "csel x4, x22, x20, ls", - "tst x4, x4" + "csel x22, x21, x24, ls", + "mov x4, x22", + "tst x22, x22" ] } } diff --git a/unittests/InstructionCountCI/FlagM/VEX_map_group.json b/unittests/InstructionCountCI/FlagM/VEX_map_group.json index 277bfad731..d9e69af9a6 100644 --- a/unittests/InstructionCountCI/FlagM/VEX_map_group.json +++ b/unittests/InstructionCountCI/FlagM/VEX_map_group.json @@ -10,83 +10,95 @@ }, "Instructions": { "blsr eax, ebx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": [ "Map group 17 0b001 32-bit" ], "ExpectedArm64ASM": [ - "sub w20, w7, #0x1 (1)", - "and w4, w20, w7", - "cmp x7, #0x0 (0)", - "cset x20, eq", - "tst w4, w4", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "sub w21, w20, #0x1 (1)", + "and w22, w21, w20", + "mov x4, x22", + "cmp x20, #0x0 (0)", + "cset x21, eq", + "tst w22, w22", + "rmif x21, #63, #nzCv" ] }, "blsr rax, rbx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": [ "Map group 17 0b001 64-bit" ], "ExpectedArm64ASM": [ - "sub x20, x7, #0x1 (1)", - "and x4, x20, x7", - "cmp x7, #0x0 (0)", - "cset x20, eq", - "tst x4, x4", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "sub x21, x20, #0x1 (1)", + "and x22, x21, x20", + "mov x4, x22", + "cmp x20, #0x0 (0)", + "cset x21, eq", + "tst x22, x22", + "rmif x21, #63, #nzCv" ] }, "blsmsk eax, ebx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": [ "Map group 17 0b010 32-bit" ], "ExpectedArm64ASM": [ - "sub w20, w7, #0x1 (1)", - "eor w4, w20, w7", - "cmp x7, #0x0 (0)", - "cset x20, eq", - "tst w4, w4", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "sub w21, w20, #0x1 (1)", + "eor w22, w21, w20", + "mov x4, x22", + "cmp x20, #0x0 (0)", + "cset x21, eq", + "tst w22, w22", + "rmif x21, #63, #nzCv" ] }, "blsmsk rax, rbx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": [ "Map group 17 0b010 64-bit" ], "ExpectedArm64ASM": [ - "sub x20, x7, #0x1 (1)", - "eor x4, x20, x7", - "cmp x7, #0x0 (0)", - "cset x20, eq", - "tst x4, x4", - "rmif x20, #63, #nzCv" + "mov x20, x7", + "sub x21, x20, #0x1 (1)", + "eor x22, x21, x20", + "mov x4, x22", + "cmp x20, #0x0 (0)", + "cset x21, eq", + "tst x22, x22", + "rmif x21, #63, #nzCv" ] }, "blsi eax, ebx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "Map group 17 0b011 32-bit" ], "ExpectedArm64ASM": [ - "neg w20, w7", - "and w4, w7, w20", - "tst w4, w4", + "mov x20, x7", + "neg w21, w20", + "and w22, w20, w21", + "mov x4, x22", + "tst w22, w22", "cset w20, ne", "rmif x20, #63, #nzCv" ] }, "blsi rax, rbx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "Map group 17 0b011 64-bit" ], "ExpectedArm64ASM": [ - "neg x20, x7", - "and x4, x7, x20", - "tst x4, x4", + "mov x20, x7", + "neg x21, x20", + "and x22, x20, x21", + "mov x4, x22", + "tst x22, x22", "cset w20, ne", "rmif x20, #63, #nzCv" ] diff --git a/unittests/InstructionCountCI/FlagM/x87.json b/unittests/InstructionCountCI/FlagM/x87.json index fde9f01d68..a552ea45f9 100644 --- a/unittests/InstructionCountCI/FlagM/x87.json +++ b/unittests/InstructionCountCI/FlagM/x87.json @@ -14,13 +14,14 @@ }, "Instructions": { "fadd dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xd8 !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -45,11 +46,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -62,10 +63,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1408]", "blr x5", "ldr w4, [x28, #728]", @@ -77,21 +78,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xd8 !11b /1" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -116,11 +118,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -133,10 +135,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1424]", "blr x5", "ldr w4, [x28, #728]", @@ -148,21 +150,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fcom dword [rax]": { - "ExpectedInstructionCount": 69, + "ExpectedInstructionCount": 70, "Comment": [ "0xd8 !11b /2" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -187,11 +190,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -204,10 +207,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1304]", "blr x5", "ldr w4, [x28, #728]", @@ -222,24 +225,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcomp dword [rax]": { - "ExpectedInstructionCount": 77, + "ExpectedInstructionCount": 78, "Comment": [ "0xd8 !11b /3" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -264,11 +268,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -281,10 +285,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1304]", "blr x5", "ldr w4, [x28, #728]", @@ -299,32 +303,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fsub dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xd8 !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -349,11 +354,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -366,10 +371,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1416]", "blr x5", "ldr w4, [x28, #728]", @@ -381,21 +386,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xd8 !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -420,11 +426,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -437,10 +443,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", - "mov x3, v3.d[0]", - "umov w4, v3.h[4]", + "mov x1, v3.d[0]", + "umov w2, v3.h[4]", + "mov x3, v2.d[0]", + "umov w4, v2.h[4]", "ldr x5, [x28, #1416]", "blr x5", "ldr w4, [x28, #728]", @@ -452,21 +458,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xd8 !11b /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -491,11 +498,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -508,10 +515,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1432]", "blr x5", "ldr w4, [x28, #728]", @@ -523,21 +530,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xd8 !11b /7" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -562,11 +570,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -579,10 +587,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", - "mov x3, v3.d[0]", - "umov w4, v3.h[4]", + "mov x1, v3.d[0]", + "umov w2, v3.h[4]", + "mov x3, v2.d[0]", + "umov w4, v2.h[4]", "ldr x5, [x28, #1432]", "blr x5", "ldr w4, [x28, #728]", @@ -594,11 +602,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fadd st0, st0": { @@ -609,8 +617,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -641,11 +649,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fadd st0, st1": { @@ -656,8 +664,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -688,11 +696,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fadd st0, st2": { @@ -703,8 +711,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -735,11 +743,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fadd st0, st3": { @@ -750,8 +758,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -782,11 +790,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fadd st0, st4": { @@ -797,8 +805,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -829,11 +837,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fadd st0, st5": { @@ -844,8 +852,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -876,11 +884,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fadd st0, st6": { @@ -891,8 +899,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -923,11 +931,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fadd st0, st7": { @@ -938,8 +946,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -970,11 +978,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul st0, st0": { @@ -985,8 +993,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1017,11 +1025,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmul st0, st1": { @@ -1032,8 +1040,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1064,11 +1072,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul st0, st2": { @@ -1079,8 +1087,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1111,11 +1119,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul st0, st3": { @@ -1126,8 +1134,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1158,11 +1166,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul st0, st4": { @@ -1173,8 +1181,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1205,11 +1213,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul st0, st5": { @@ -1220,8 +1228,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1252,11 +1260,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul st0, st6": { @@ -1267,8 +1275,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1299,11 +1307,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul st0, st7": { @@ -1314,8 +1322,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1346,11 +1354,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fcom st0, st0": { @@ -1362,8 +1370,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x0", "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1397,13 +1405,13 @@ "mov x20, x0", "ubfx x22, x20, #1, #1", "ubfx x23, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w22, w22, w20", - "orr w23, w23, w20", - "strb w22, [x28, #744]", + "ubfx x24, x20, #2, #1", + "orr w20, w22, w24", + "orr w22, w23, w24", + "strb w20, [x28, #744]", "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w23, [x28, #750]" + "strb w24, [x28, #746]", + "strb w22, [x28, #750]" ] }, "fcom st0, st1": { @@ -1414,8 +1422,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1449,14 +1457,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcom st0, st2": { @@ -1467,8 +1475,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1502,14 +1510,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcom st0, st3": { @@ -1520,8 +1528,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1555,14 +1563,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcom st0, st4": { @@ -1573,8 +1581,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1608,14 +1616,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcom st0, st5": { @@ -1626,8 +1634,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1661,14 +1669,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcom st0, st6": { @@ -1679,8 +1687,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1714,14 +1722,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcom st0, st7": { @@ -1732,8 +1740,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1767,14 +1775,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcomp st0, st0": { @@ -1786,8 +1794,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x0", "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1821,20 +1829,20 @@ "mov x22, x0", "ubfx x23, x22, #1, #1", "ubfx x24, x22, #0, #1", - "ubfx x22, x22, #2, #1", - "orr w23, w23, w22", - "orr w24, w24, w22", - "strb w23, [x28, #744]", + "ubfx x25, x22, #2, #1", + "orr w22, w23, w25", + "orr w23, w24, w25", + "strb w22, [x28, #744]", "strb w21, [x28, #745]", - "strb w22, [x28, #746]", - "strb w24, [x28, #750]", + "strb w25, [x28, #746]", + "strb w23, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -1847,8 +1855,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1882,20 +1890,20 @@ "mov x22, x0", "ubfx x23, x22, #1, #1", "ubfx x24, x22, #0, #1", - "ubfx x22, x22, #2, #1", - "orr w23, w23, w22", - "orr w24, w24, w22", - "strb w23, [x28, #744]", - "mov w23, #0x0", - "strb w23, [x28, #745]", - "strb w22, [x28, #746]", - "strb w24, [x28, #750]", + "ubfx x25, x22, #2, #1", + "orr w22, w23, w25", + "orr w23, w24, w25", + "strb w22, [x28, #744]", + "mov w22, #0x0", + "strb w22, [x28, #745]", + "strb w25, [x28, #746]", + "strb w23, [x28, #750]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -1907,8 +1915,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1942,21 +1950,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -1968,8 +1976,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2003,21 +2011,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -2029,8 +2037,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2064,21 +2072,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -2090,8 +2098,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2125,21 +2133,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -2151,8 +2159,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2186,21 +2194,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -2212,8 +2220,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2247,21 +2255,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -2273,8 +2281,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2305,11 +2313,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsub st0, st1": { @@ -2320,8 +2328,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2352,11 +2360,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsub st0, st2": { @@ -2367,8 +2375,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2399,11 +2407,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsub st0, st3": { @@ -2414,8 +2422,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2446,11 +2454,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsub st0, st4": { @@ -2461,8 +2469,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2493,11 +2501,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsub st0, st5": { @@ -2508,8 +2516,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2540,11 +2548,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsub st0, st6": { @@ -2555,8 +2563,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2587,11 +2595,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsub st0, st7": { @@ -2602,8 +2610,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2634,11 +2642,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr st0, st0": { @@ -2649,8 +2657,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2681,11 +2689,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubr st0, st1": { @@ -2696,8 +2704,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2728,11 +2736,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr st0, st2": { @@ -2743,8 +2751,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2775,11 +2783,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr st0, st3": { @@ -2790,8 +2798,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2822,11 +2830,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr st0, st4": { @@ -2837,8 +2845,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2869,11 +2877,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr st0, st5": { @@ -2884,8 +2892,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2916,11 +2924,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr st0, st6": { @@ -2931,8 +2939,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2963,11 +2971,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr st0, st7": { @@ -2978,8 +2986,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3010,11 +3018,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv st0, st0": { @@ -3025,8 +3033,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3057,11 +3065,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdiv st0, st1": { @@ -3072,8 +3080,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3104,11 +3112,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv st0, st2": { @@ -3119,8 +3127,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3151,11 +3159,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv st0, st3": { @@ -3166,8 +3174,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3198,11 +3206,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv st0, st4": { @@ -3213,8 +3221,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3245,11 +3253,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv st0, st5": { @@ -3260,8 +3268,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3292,11 +3300,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv st0, st6": { @@ -3307,8 +3315,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3339,11 +3347,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv st0, st7": { @@ -3354,8 +3362,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3386,11 +3394,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr st0, st0": { @@ -3401,8 +3409,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3433,11 +3441,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivr st0, st1": { @@ -3448,8 +3456,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3480,11 +3488,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr st0, st2": { @@ -3495,8 +3503,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3527,11 +3535,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr st0, st3": { @@ -3542,8 +3550,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3574,11 +3582,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr st0, st4": { @@ -3589,8 +3597,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3621,11 +3629,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr st0, st5": { @@ -3636,8 +3644,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3668,11 +3676,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr st0, st6": { @@ -3683,8 +3691,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3715,11 +3723,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr st0, st7": { @@ -3730,8 +3738,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3762,21 +3770,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fld dword [rax]": { - "ExpectedInstructionCount": 39, + "ExpectedInstructionCount": 40, "Comment": [ "0xd9 !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -3801,23 +3810,23 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fst dword [rax]": { - "ExpectedInstructionCount": 30, + "ExpectedInstructionCount": 31, "Comment": [ "0xd9 !11b /2" ], @@ -3850,12 +3859,13 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "fmov s2, s0", - "str s2, [x4]" + "fmov s3, s0", + "mov x20, x4", + "str s3, [x20]" ] }, "fstp dword [rax]": { - "ExpectedInstructionCount": 38, + "ExpectedInstructionCount": 39, "Comment": [ "0xd9 !11b /3" ], @@ -3888,82 +3898,85 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "fmov s2, s0", - "str s2, [x4]", + "fmov s3, s0", + "mov x21, x4", + "str s3, [x21]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fldenv [rax]": { - "ExpectedInstructionCount": 48, + "ExpectedInstructionCount": 49, "Comment": [ "0xd9 !11b /4" ], "ExpectedArm64ASM": [ - "ldrh w20, [x4]", - "strh w20, [x28, #1024]", - "ldr w20, [x4, #4]", - "ubfx w21, w20, #11, #3", - "strb w21, [x28, #747]", - "ubfx w21, w20, #8, #1", - "ubfx w22, w20, #9, #1", - "ubfx w23, w20, #10, #1", - "ubfx w20, w20, #14, #1", - "strb w21, [x28, #744]", - "strb w22, [x28, #745]", - "strb w23, [x28, #746]", - "strb w20, [x28, #750]", - "ldr w20, [x4, #8]", - "ubfx w21, w20, #0, #2", + "mov x20, x4", + "ldrh w21, [x20]", + "strh w21, [x28, #1024]", + "ldr w21, [x20, #4]", + "ubfx w22, w21, #11, #3", + "strb w22, [x28, #747]", + "ubfx w22, w21, #8, #1", + "ubfx w23, w21, #9, #1", + "ubfx w24, w21, #10, #1", + "ubfx w25, w21, #14, #1", + "strb w22, [x28, #744]", + "strb w23, [x28, #745]", + "strb w24, [x28, #746]", + "strb w25, [x28, #750]", + "ldr w21, [x20, #8]", + "ubfx w20, w21, #0, #2", "mrs x22, nzcv", - "cmp x21, #0x3 (3)", - "cset x21, ne", - "ubfx w23, w20, #2, #2", - "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #1", - "ubfx w23, w20, #4, #2", - "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #2", - "ubfx w23, w20, #6, #2", - "cmp x23, #0x3 (3)", + "cmp x20, #0x3 (3)", "cset x23, ne", - "orr w21, w21, w23, lsl #3", - "ubfx w23, w20, #8, #2", + "ubfx w20, w21, #2, #2", + "cmp x20, #0x3 (3)", + "cset x24, ne", + "orr w20, w23, w24, lsl #1", + "ubfx w23, w21, #4, #2", "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #4", - "ubfx w23, w20, #10, #2", + "cset x24, ne", + "orr w23, w20, w24, lsl #2", + "ubfx w20, w21, #6, #2", + "cmp x20, #0x3 (3)", + "cset x24, ne", + "orr w20, w23, w24, lsl #3", + "ubfx w23, w21, #8, #2", "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #5", - "ubfx w23, w20, #12, #2", + "cset x24, ne", + "orr w23, w20, w24, lsl #4", + "ubfx w20, w21, #10, #2", + "cmp x20, #0x3 (3)", + "cset x24, ne", + "orr w20, w23, w24, lsl #5", + "ubfx w23, w21, #12, #2", "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #6", - "ubfx w20, w20, #14, #2", + "cset x24, ne", + "orr w23, w20, w24, lsl #6", + "ubfx w20, w21, #14, #2", "cmp x20, #0x3 (3)", - "cset x20, ne", - "orr w20, w21, w20, lsl #7", + "cset x21, ne", + "orr w20, w23, w21, lsl #7", "strb w20, [x28, #1026]", "msr nzcv, x22" ] }, "fldcw [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": [ "0xd9 !11b /5" ], "ExpectedArm64ASM": [ - "ldrh w20, [x4]", - "strh w20, [x28, #1024]" + "mov x20, x4", + "ldrh w21, [x20]", + "strh w21, [x28, #1024]" ] }, "fnstenv [rax]": { @@ -3972,80 +3985,81 @@ "0xd9 !11b /6" ], "ExpectedArm64ASM": [ - "ldrh w20, [x28, #1024]", - "str w20, [x4]", - "mov w20, #0x0", - "ldrb w21, [x28, #747]", - "mov x0, x20", - "bfi x0, x21, #11, #3", - "mov x21, x0", + "mov x20, x4", + "ldrh w21, [x28, #1024]", + "str w21, [x20]", + "mov w21, #0x0", + "ldrb w22, [x28, #747]", + "mov x23, x21", + "bfi x23, x22, #11, #3", "ldrb w22, [x28, #744]", - "ldrb w23, [x28, #745]", - "ldrb w24, [x28, #746]", - "ldrb w25, [x28, #750]", - "orr x21, x21, x22, lsl #8", - "orr x21, x21, x23, lsl #9", - "orr x21, x21, x24, lsl #10", - "orr x21, x21, x25, lsl #14", - "str w21, [x4, #4]", - "ldrb w21, [x28, #1026]", - "and w22, w21, #0x1", - "mov w23, #0x3", - "mrs x24, nzcv", + "ldrb w24, [x28, #745]", + "ldrb w25, [x28, #746]", + "ldrb w30, [x28, #750]", + "orr x18, x23, x22, lsl #8", + "orr x22, x18, x24, lsl #9", + "orr x23, x22, x25, lsl #10", + "orr x22, x23, x30, lsl #14", + "str w22, [x20, #4]", + "ldrb w22, [x28, #1026]", + "and w23, w22, #0x1", + "mov w24, #0x3", + "mrs x25, nzcv", + "cmp x23, #0x0 (0)", + "csel x30, x24, x21, eq", + "orr w23, w21, w30", + "lsr w30, w22, #1", + "and w18, w30, #0x1", + "cmp x18, #0x0 (0)", + "csel x30, x24, x21, eq", + "orr w18, w23, w30, lsl #2", + "lsr w23, w22, #2", + "and w30, w23, #0x1", + "cmp x30, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w30, w18, w23, lsl #4", + "lsr w23, w22, #3", + "and w18, w23, #0x1", + "cmp x18, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w18, w30, w23, lsl #6", + "lsr w23, w22, #4", + "and w30, w23, #0x1", + "cmp x30, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w30, w18, w23, lsl #8", + "lsr w23, w22, #5", + "and w18, w23, #0x1", + "cmp x18, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w18, w30, w23, lsl #10", + "lsr w23, w22, #6", + "and w30, w23, #0x1", + "cmp x30, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w30, w18, w23, lsl #12", + "lsr w23, w22, #7", + "and w22, w23, #0x1", "cmp x22, #0x0 (0)", - "csel x22, x23, x20, eq", - "orr w22, w20, w22", - "lsr w25, w21, #1", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #2", - "lsr w25, w21, #2", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #4", - "lsr w25, w21, #3", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #6", - "lsr w25, w21, #4", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #8", - "lsr w25, w21, #5", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #10", - "lsr w25, w21, #6", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #12", - "lsr w21, w21, #7", - "and w21, w21, #0x1", - "cmp x21, #0x0 (0)", - "csel x21, x23, x20, eq", - "orr w21, w22, w21, lsl #14", - "str w21, [x4, #8]", - "str w20, [x4, #12]", - "str w20, [x4, #16]", - "str w20, [x4, #20]", - "str w20, [x4, #24]", - "msr nzcv, x24" + "csel x23, x24, x21, eq", + "orr w22, w30, w23, lsl #14", + "str w22, [x20, #8]", + "str w21, [x20, #12]", + "str w21, [x20, #16]", + "str w21, [x20, #20]", + "str w21, [x20, #24]", + "msr nzcv, x25" ] }, "fnstcw [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": [ "0xd9 !11b /7" ], "ExpectedArm64ASM": [ "ldrh w20, [x28, #1024]", - "strh w20, [x4]" + "mov x21, x4", + "strh w20, [x21]" ] }, "fld st0": { @@ -4056,15 +4070,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -4080,14 +4094,14 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -4102,15 +4116,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -4125,15 +4139,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -4148,15 +4162,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -4171,15 +4185,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -4194,15 +4208,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -4217,15 +4231,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -4240,14 +4254,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -4259,14 +4273,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -4278,14 +4292,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -4297,14 +4311,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -4316,14 +4330,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -4335,14 +4349,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -4354,14 +4368,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -4373,14 +4387,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -4392,7 +4406,7 @@ "ExpectedArm64ASM": [] }, "fchs": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 11, "Comment": [ "0xd9 11b 0xe0 /4" ], @@ -4403,14 +4417,15 @@ "mov w21, #0x0", "mov w22, #0x8000", "fmov d3, x21", - "mov v3.d[1], x22", - "eor v2.16b, v2.16b, v3.16b", + "mov v4.16b, v3.16b", + "mov v4.d[1], x22", + "eor v3.16b, v2.16b, v4.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fabs": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 11, "Comment": [ "0xd9 11b 0xe1 /4" ], @@ -4421,10 +4436,11 @@ "mov x21, #0xffffffffffffffff", "mov w22, #0x7fff", "fmov d3, x21", - "mov v3.d[1], x22", - "and v2.16b, v2.16b, v3.16b", + "mov v4.16b, v3.16b", + "mov v4.d[1], x22", + "and v3.16b, v2.16b, v4.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "ftst": { @@ -4468,13 +4484,13 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", "strb w20, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]" + "strb w24, [x28, #746]", + "strb w22, [x28, #750]" ] }, "fxam": { @@ -4487,11 +4503,11 @@ "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", "mov x21, v2.d[1]", - "ubfx x21, x21, #15, #1", - "strb w21, [x28, #745]", + "ubfx x22, x21, #15, #1", + "strb w22, [x28, #745]", "ldrb w21, [x28, #1026]", - "lsr w20, w21, w20", - "and w20, w20, #0x1", + "lsr w22, w21, w20", + "and w20, w22, #0x1", "mrs x21, nzcv", "cmp x20, #0x1 (1)", "cset x22, ne", @@ -4509,11 +4525,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "ldr q2, [x28, #2304]", @@ -4529,11 +4545,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "ldr q2, [x28, #2320]", @@ -4549,11 +4565,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "ldr q2, [x28, #2336]", @@ -4569,11 +4585,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "ldr q2, [x28, #2352]", @@ -4589,11 +4605,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "ldr q2, [x28, #2368]", @@ -4609,11 +4625,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "ldr q2, [x28, #2384]", @@ -4629,11 +4645,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "movi v2.2d, #0x0", @@ -4675,11 +4691,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fyl2x": { @@ -4691,15 +4707,15 @@ "ldrb w20, [x28, #747]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "strb w21, [x28, #747]", + "and w22, w21, #0x7", + "strb w22, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -4728,11 +4744,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fptan": { @@ -4744,12 +4760,12 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "sub w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w22", - "orr w21, w23, w21", + "and w23, w22, #0x7", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w23", + "orr w21, w22, w24", "strb w21, [x28, #1026]", - "strb w22, [x28, #747]", + "strb w23, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", "mrs x0, nzcv", @@ -4777,16 +4793,16 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldr q3, [x28, #2304]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "ldr q2, [x28, #2304]", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "add x0, x28, x22, lsl #4", - "str q3, [x0, #768]" + "str q3, [x0, #768]", + "add x0, x28, x23, lsl #4", + "str q2, [x0, #768]" ] }, "fpatan": { @@ -4798,15 +4814,15 @@ "ldrb w20, [x28, #747]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "strb w21, [x28, #747]", + "and w22, w21, #0x7", + "strb w22, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -4835,11 +4851,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fxtract": { @@ -4851,12 +4867,12 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "sub w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w22", - "orr w21, w23, w21", + "and w23, w22, #0x7", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w23", + "orr w21, w22, w24", "strb w21, [x28, #1026]", - "strb w22, [x28, #747]", + "strb w23, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", "mrs x0, nzcv", @@ -4912,13 +4928,13 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x22, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str q4, [x0, #768]" ] }, "fprem1": { @@ -4929,10 +4945,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -4961,13 +4977,13 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdecstp": { @@ -4977,8 +4993,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -4989,8 +5005,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -5002,10 +5018,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -5034,13 +5050,13 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fyl2xp1": { @@ -5052,15 +5068,15 @@ "ldrb w20, [x28, #747]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "strb w21, [x28, #747]", + "and w22, w21, #0x7", + "strb w22, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "ldr q4, [x28, #2304]", "mrs x0, nzcv", @@ -5090,9 +5106,9 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v5.16b, v5.16b, v5.16b", + "mov v5.d[0], x0", + "mov v5.h[4], w1", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5105,8 +5121,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", + "mov x1, v5.d[0]", + "umov w2, v5.h[4]", "mov x3, v3.d[0]", "umov w4, v3.h[4]", "ldr x5, [x28, #1440]", @@ -5123,7 +5139,7 @@ "eor v2.16b, v2.16b, v2.16b", "mov v2.d[0], x0", "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -5161,11 +5177,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fsincos": { @@ -5177,12 +5193,12 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "sub w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w22", - "orr w21, w23, w21", + "and w23, w22, #0x7", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w23", + "orr w21, w22, w24", "strb w21, [x28, #1026]", - "strb w22, [x28, #747]", + "strb w23, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", "mrs x0, nzcv", @@ -5238,15 +5254,15 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x22, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str q4, [x0, #768]" ] }, "frndint": { @@ -5283,11 +5299,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fscale": { @@ -5298,10 +5314,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -5330,11 +5346,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsin": { @@ -5371,13 +5387,13 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fcos": { @@ -5414,23 +5430,24 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fiadd dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xda !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", + "mov x21, x4", + "ldr w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5442,7 +5459,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "mov w1, w21", + "mov w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1320]", "blr x2", @@ -5487,21 +5504,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fimul dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xda !11b /1" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", + "mov x21, x4", + "ldr w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5513,7 +5531,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "mov w1, w21", + "mov w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1320]", "blr x2", @@ -5558,21 +5576,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "ficom dword [rax]": { - "ExpectedInstructionCount": 69, + "ExpectedInstructionCount": 70, "Comment": [ "0xda !11b /2" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", + "mov x21, x4", + "ldr w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5584,7 +5603,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "mov w1, w21", + "mov w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1320]", "blr x2", @@ -5632,24 +5651,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "ficomp dword [rax]": { - "ExpectedInstructionCount": 77, + "ExpectedInstructionCount": 78, "Comment": [ "0xda !11b /3" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", + "mov x21, x4", + "ldr w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5661,7 +5681,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "mov w1, w21", + "mov w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1320]", "blr x2", @@ -5709,32 +5729,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fisub dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xda !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", + "mov x21, x4", + "ldr w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5746,7 +5767,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "mov w1, w21", + "mov w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1320]", "blr x2", @@ -5791,21 +5812,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fisubr dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xda !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", + "mov x21, x4", + "ldr w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5817,7 +5839,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "mov w1, w21", + "mov w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1320]", "blr x2", @@ -5862,21 +5884,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fidiv dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xda !11b /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", + "mov x21, x4", + "ldr w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5888,7 +5911,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "mov w1, w21", + "mov w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1320]", "blr x2", @@ -5933,21 +5956,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fidivr dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xda !11b /7" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", + "mov x21, x4", + "ldr w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5959,7 +5983,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "mov w1, w21", + "mov w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1320]", "blr x2", @@ -6004,15 +6028,15 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fcmovb st0, st0": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc0 /0" ], @@ -6021,18 +6045,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc1 /0" ], @@ -6041,18 +6066,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st2": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc2 /0" ], @@ -6061,18 +6087,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st3": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc3 /0" ], @@ -6081,18 +6108,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st4": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc4 /0" ], @@ -6101,18 +6129,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st5": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc5 /0" ], @@ -6121,18 +6150,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st6": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc6 /0" ], @@ -6141,18 +6171,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st7": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc7 /0" ], @@ -6161,18 +6192,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st0": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc8 /1" ], @@ -6181,18 +6213,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc9 /1" ], @@ -6201,18 +6234,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st2": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xca /1" ], @@ -6221,18 +6255,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st3": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xcb /1" ], @@ -6241,18 +6276,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st4": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xcc /1" ], @@ -6261,18 +6297,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st5": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xcd /1" ], @@ -6281,18 +6318,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st6": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xce /1" ], @@ -6301,18 +6339,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st7": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xcf /1" ], @@ -6321,398 +6360,423 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st0": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd0 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st1": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd1 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st2": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd2 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st3": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd3 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st4": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd4 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st5": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd5 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st6": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd6 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st7": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd7 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovu st0, st0": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xd8 /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x0 (0)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st1": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xd9 /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x1 (1)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st2": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xda /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x2 (2)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x2 (2)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st3": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xdb /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x3 (3)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x3 (3)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st4": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xdc /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x4 (4)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x4 (4)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st5": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xdd /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x5 (5)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x5 (5)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st6": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xde /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x6 (6)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x6 (6)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st7": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xdf /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x7 (7)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x7 (7)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fucompp": { @@ -6724,8 +6788,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -6759,73 +6823,78 @@ "mov x22, x0", "ubfx x23, x22, #1, #1", "ubfx x24, x22, #0, #1", - "ubfx x22, x22, #2, #1", - "orr w23, w23, w22", - "orr w24, w24, w22", - "strb w23, [x28, #744]", - "mov w23, #0x0", - "strb w23, [x28, #745]", - "strb w22, [x28, #746]", - "strb w24, [x28, #750]", + "ubfx x25, x22, #2, #1", + "orr w22, w23, w25", + "orr w23, w24, w25", + "strb w22, [x28, #744]", + "mov w22, #0x0", + "strb w22, [x28, #745]", + "strb w25, [x28, #746]", + "strb w23, [x28, #750]", "ldrb w22, [x28, #1026]", "lsl w23, w21, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "bic w24, w22, w23", + "strb w24, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "lsl w22, w21, w20", + "bic w21, w24, w22", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fild dword [rax]": { - "ExpectedInstructionCount": 35, + "ExpectedInstructionCount": 40, "Comment": [ "0xdf !11b /5" ], "ExpectedArm64ASM": [ + "sub sp, sp, #0x20 (32)", "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "ldr w21, [x4]", - "mov w22, #0x0", - "sxtw x21, w21", + "mov x21, x4", + "ldr w22, [x21]", + "mov w21, #0x0", + "sxtw x23, w22", "mrs x22, nzcv", - "cmp x21, #0x0 (0)", - "mov w23, #0x8000", - "csel x23, x23, xzr, lt", - "cneg x21, x21, mi", - "mov w24, #0x3f", + "cmp x23, #0x0 (0)", + "mov w24, #0x8000", + "csel x25, x24, xzr, lt", + "cneg x24, x23, mi", + "mov w23, #0x3f", "mov x0, #0x3f", - "clz x25, x21", - "sub x25, x0, x25", - "sub x24, x24, x25", - "lsl x25, x21, x24", + "clz x30, x24", + "sub x30, x0, x30", + "sub x18, x23, x30", + "lsl x23, x24, x18", "mov w30, #0x403e", - "sub x24, x30, x24", - "mov w30, #0x0", - "cmp x21, #0x0 (0)", - "csel x21, x30, x24, eq", - "orr x21, x23, x21", - "fmov d2, x25", + "str w22, [sp]", + "sub x22, x30, x18", + "cmp x24, #0x0 (0)", + "csel x30, x21, x22, eq", + "orr x21, x25, x30", + "fmov d2, x23", "fmov d3, x21", - "mov v2.d[1], v3.d[0]", + "mov v4.16b, v2.16b", + "mov v4.d[1], v3.d[0]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x22" + "str q4, [x0, #768]", + "ldr w20, [sp]", + "msr nzcv, x20", + "add sp, sp, #0x20 (32)" ] }, "fisttp dword [rax]": { - "ExpectedInstructionCount": 38, + "ExpectedInstructionCount": 39, "Comment": [ "0xdb !11b /1" ], @@ -6859,19 +6928,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "mov w21, w0", - "str w21, [x4]", + "mov x22, x4", + "str w21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fist dword [rax]": { - "ExpectedInstructionCount": 30, + "ExpectedInstructionCount": 31, "Comment": [ "0xdb !11b /2" ], @@ -6905,11 +6975,12 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "mov w20, w0", - "str w20, [x4]" + "mov x21, x4", + "str w20, [x21]" ] }, "fistp dword [rax]": { - "ExpectedInstructionCount": 38, + "ExpectedInstructionCount": 39, "Comment": [ "0xdf !11b /7" ], @@ -6943,31 +7014,33 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "mov w21, w0", - "str w21, [x4]", + "mov x22, x4", + "str w21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fld tword [rax]": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr q2, [x4]", + "mov x21, x4", + "ldr q2, [x21]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -6975,7 +7048,7 @@ ] }, "fstp tword [rax]": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xdb !11b /7" ], @@ -6983,21 +7056,22 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "str d2, [x4]", - "mov x21, v2.d[1]", - "strh w21, [x4, #8]", + "mov x21, x4", + "str d2, [x21]", + "mov x22, v2.d[1]", + "strh w22, [x21, #8]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcmovnb st0, st0": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc0 /0" ], @@ -7006,18 +7080,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc1 /0" ], @@ -7026,18 +7101,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st2": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc2 /0" ], @@ -7046,18 +7122,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st3": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc3 /0" ], @@ -7066,18 +7143,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st4": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc4 /0" ], @@ -7086,18 +7164,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st5": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc5 /0" ], @@ -7106,18 +7185,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st6": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc6 /0" ], @@ -7126,18 +7206,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st7": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc7 /0" ], @@ -7146,18 +7227,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st0": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc8 /1" ], @@ -7166,18 +7248,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc9 /1" ], @@ -7186,18 +7269,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st2": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xca /1" ], @@ -7206,18 +7290,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st3": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xcb /1" ], @@ -7226,18 +7311,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st4": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xcc /1" ], @@ -7246,18 +7332,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st5": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xcd /1" ], @@ -7266,18 +7353,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st6": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xce /1" ], @@ -7286,18 +7374,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st7": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xcf /1" ], @@ -7306,390 +7395,415 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st0": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd0 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st1": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd1 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st2": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd2 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st3": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd3 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st4": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd4 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st5": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd5 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st6": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd6 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st7": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd7 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnu st0, st0": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xd8 /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x0 (0)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st1": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xd9 /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x1 (1)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st2": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xda /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x2 (2)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x2 (2)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st3": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xdb /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x3 (3)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x3 (3)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st4": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xdc /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x4 (4)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x4 (4)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st5": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xdd /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x5 (5)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x5 (5)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st6": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xde /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x6 (6)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x6 (6)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st7": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xdf /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x7 (7)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x7 (7)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fnclex": { @@ -7717,15 +7831,15 @@ ] }, "fucomi st0, st0": { - "ExpectedInstructionCount": 43, + "ExpectedInstructionCount": 44, "Comment": [ "0xdb 11b 0xe8 /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -7759,24 +7873,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "rmif x21, #63, #nzCv", - "rmif x22, #62, #nZcv", - "eor w26, w20, #0x1" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "rmif x20, #63, #nzCv", + "rmif x21, #62, #nZcv", + "eor w20, w23, #0x1", + "mov x26, x20" ] }, "fucomi st0, st1": { - "ExpectedInstructionCount": 43, + "ExpectedInstructionCount": 44, "Comment": [ "0xdb 11b 0xe9 /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -7810,24 +7925,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "rmif x21, #63, #nzCv", - "rmif x22, #62, #nZcv", - "eor w26, w20, #0x1" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "rmif x20, #63, #nzCv", + "rmif x21, #62, #nZcv", + "eor w20, w23, #0x1", + "mov x26, x20" ] }, "fucomi st0, st2": { - "ExpectedInstructionCount": 43, + "ExpectedInstructionCount": 44, "Comment": [ "0xdb 11b 0xea /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -7861,24 +7977,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "rmif x21, #63, #nzCv", - "rmif x22, #62, #nZcv", - "eor w26, w20, #0x1" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "rmif x20, #63, #nzCv", + "rmif x21, #62, #nZcv", + "eor w20, w23, #0x1", + "mov x26, x20" ] }, "fucomi st0, st3": { - "ExpectedInstructionCount": 43, + "ExpectedInstructionCount": 44, "Comment": [ "0xdb 11b 0xeb /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -7912,24 +8029,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "rmif x21, #63, #nzCv", - "rmif x22, #62, #nZcv", - "eor w26, w20, #0x1" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "rmif x20, #63, #nzCv", + "rmif x21, #62, #nZcv", + "eor w20, w23, #0x1", + "mov x26, x20" ] }, "fucomi st0, st4": { - "ExpectedInstructionCount": 43, + "ExpectedInstructionCount": 44, "Comment": [ "0xdb 11b 0xec /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -7963,24 +8081,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "rmif x21, #63, #nzCv", - "rmif x22, #62, #nZcv", - "eor w26, w20, #0x1" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "rmif x20, #63, #nzCv", + "rmif x21, #62, #nZcv", + "eor w20, w23, #0x1", + "mov x26, x20" ] }, "fucomi st0, st5": { - "ExpectedInstructionCount": 43, + "ExpectedInstructionCount": 44, "Comment": [ "0xdb 11b 0xed /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8014,24 +8133,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "rmif x21, #63, #nzCv", - "rmif x22, #62, #nZcv", - "eor w26, w20, #0x1" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "rmif x20, #63, #nzCv", + "rmif x21, #62, #nZcv", + "eor w20, w23, #0x1", + "mov x26, x20" ] }, "fucomi st0, st6": { - "ExpectedInstructionCount": 43, + "ExpectedInstructionCount": 44, "Comment": [ "0xdb 11b 0xee /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8065,24 +8185,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "rmif x21, #63, #nzCv", - "rmif x22, #62, #nZcv", - "eor w26, w20, #0x1" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "rmif x20, #63, #nzCv", + "rmif x21, #62, #nZcv", + "eor w20, w23, #0x1", + "mov x26, x20" ] }, "fucomi st0, st7": { - "ExpectedInstructionCount": 43, + "ExpectedInstructionCount": 44, "Comment": [ "0xdb 11b 0xef /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8116,24 +8237,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "rmif x21, #63, #nzCv", - "rmif x22, #62, #nZcv", - "eor w26, w20, #0x1" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "rmif x20, #63, #nzCv", + "rmif x21, #62, #nZcv", + "eor w20, w23, #0x1", + "mov x26, x20" ] }, "fcomi st0, st0": { - "ExpectedInstructionCount": 43, + "ExpectedInstructionCount": 44, "Comment": [ "0xdb 11b 0xf0 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8167,24 +8289,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "rmif x21, #63, #nzCv", - "rmif x22, #62, #nZcv", - "eor w26, w20, #0x1" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "rmif x20, #63, #nzCv", + "rmif x21, #62, #nZcv", + "eor w20, w23, #0x1", + "mov x26, x20" ] }, "fcomi st0, st1": { - "ExpectedInstructionCount": 43, + "ExpectedInstructionCount": 44, "Comment": [ "0xdb 11b 0xf1 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8218,24 +8341,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "rmif x21, #63, #nzCv", - "rmif x22, #62, #nZcv", - "eor w26, w20, #0x1" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "rmif x20, #63, #nzCv", + "rmif x21, #62, #nZcv", + "eor w20, w23, #0x1", + "mov x26, x20" ] }, "fcomi st0, st2": { - "ExpectedInstructionCount": 43, + "ExpectedInstructionCount": 44, "Comment": [ "0xdb 11b 0xf2 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8269,24 +8393,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "rmif x21, #63, #nzCv", - "rmif x22, #62, #nZcv", - "eor w26, w20, #0x1" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "rmif x20, #63, #nzCv", + "rmif x21, #62, #nZcv", + "eor w20, w23, #0x1", + "mov x26, x20" ] }, "fcomi st0, st3": { - "ExpectedInstructionCount": 43, + "ExpectedInstructionCount": 44, "Comment": [ "0xdb 11b 0xf3 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8320,24 +8445,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "rmif x21, #63, #nzCv", - "rmif x22, #62, #nZcv", - "eor w26, w20, #0x1" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "rmif x20, #63, #nzCv", + "rmif x21, #62, #nZcv", + "eor w20, w23, #0x1", + "mov x26, x20" ] }, "fcomi st0, st4": { - "ExpectedInstructionCount": 43, + "ExpectedInstructionCount": 44, "Comment": [ "0xdb 11b 0xf4 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8371,24 +8497,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "rmif x21, #63, #nzCv", - "rmif x22, #62, #nZcv", - "eor w26, w20, #0x1" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "rmif x20, #63, #nzCv", + "rmif x21, #62, #nZcv", + "eor w20, w23, #0x1", + "mov x26, x20" ] }, "fcomi st0, st5": { - "ExpectedInstructionCount": 43, + "ExpectedInstructionCount": 44, "Comment": [ "0xdb 11b 0xf5 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8422,24 +8549,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "rmif x21, #63, #nzCv", - "rmif x22, #62, #nZcv", - "eor w26, w20, #0x1" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "rmif x20, #63, #nzCv", + "rmif x21, #62, #nZcv", + "eor w20, w23, #0x1", + "mov x26, x20" ] }, "fcomi st0, st6": { - "ExpectedInstructionCount": 43, + "ExpectedInstructionCount": 44, "Comment": [ "0xdb 11b 0xf6 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8473,24 +8601,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "rmif x21, #63, #nzCv", - "rmif x22, #62, #nZcv", - "eor w26, w20, #0x1" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "rmif x20, #63, #nzCv", + "rmif x21, #62, #nZcv", + "eor w20, w23, #0x1", + "mov x26, x20" ] }, "fcomi st0, st7": { - "ExpectedInstructionCount": 43, + "ExpectedInstructionCount": 44, "Comment": [ "0xdb 11b 0xf7 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8524,22 +8653,24 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "rmif x21, #63, #nzCv", - "rmif x22, #62, #nZcv", - "eor w26, w20, #0x1" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "rmif x20, #63, #nzCv", + "rmif x21, #62, #nZcv", + "eor w20, w23, #0x1", + "mov x26, x20" ] }, "fadd qword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xdc !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8564,11 +8695,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8581,10 +8712,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1408]", "blr x5", "ldr w4, [x28, #728]", @@ -8596,21 +8727,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul qword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xdc !11b /1" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8635,11 +8767,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8652,10 +8784,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1424]", "blr x5", "ldr w4, [x28, #728]", @@ -8667,21 +8799,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fcom qword [rax]": { - "ExpectedInstructionCount": 69, + "ExpectedInstructionCount": 70, "Comment": [ "0xdc !11b /2" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8706,11 +8839,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8723,10 +8856,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1304]", "blr x5", "ldr w4, [x28, #728]", @@ -8741,24 +8874,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcomp qword [rax]": { - "ExpectedInstructionCount": 77, + "ExpectedInstructionCount": 78, "Comment": [ "0xdc !11b /3" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8783,11 +8917,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8800,10 +8934,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1304]", "blr x5", "ldr w4, [x28, #728]", @@ -8818,32 +8952,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fsub qword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xdc !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8868,11 +9003,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8885,10 +9020,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1416]", "blr x5", "ldr w4, [x28, #728]", @@ -8900,21 +9035,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr qword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xdc !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8939,11 +9075,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8956,10 +9092,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", - "mov x3, v3.d[0]", - "umov w4, v3.h[4]", + "mov x1, v3.d[0]", + "umov w2, v3.h[4]", + "mov x3, v2.d[0]", + "umov w4, v2.h[4]", "ldr x5, [x28, #1416]", "blr x5", "ldr w4, [x28, #728]", @@ -8971,21 +9107,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv qword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xdc !11b /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -9010,11 +9147,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -9027,10 +9164,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1432]", "blr x5", "ldr w4, [x28, #728]", @@ -9042,21 +9179,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr qword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xdc !11b /7" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -9081,11 +9219,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -9098,10 +9236,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", - "mov x3, v3.d[0]", - "umov w4, v3.h[4]", + "mov x1, v3.d[0]", + "umov w2, v3.h[4]", + "mov x3, v2.d[0]", + "umov w4, v2.h[4]", "ldr x5, [x28, #1432]", "blr x5", "ldr w4, [x28, #728]", @@ -9113,11 +9251,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "db 0xdc, 0xc0": { @@ -9130,8 +9268,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9162,11 +9300,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fadd st1, st0": { @@ -9177,8 +9315,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9209,11 +9347,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fadd st2, st0": { @@ -9224,8 +9362,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9256,11 +9394,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fadd st3, st0": { @@ -9271,8 +9409,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9303,11 +9441,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fadd st4, st0": { @@ -9318,8 +9456,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9350,11 +9488,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fadd st5, st0": { @@ -9365,8 +9503,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9397,11 +9535,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fadd st6, st0": { @@ -9412,8 +9550,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9444,11 +9582,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fadd st7, st0": { @@ -9459,8 +9597,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9491,11 +9629,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "db 0xdc, 0xc8": { @@ -9508,8 +9646,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9540,11 +9678,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmul st1, st0": { @@ -9555,8 +9693,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9587,11 +9725,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmul st2, st0": { @@ -9602,8 +9740,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9634,11 +9772,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmul st3, st0": { @@ -9649,8 +9787,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9681,11 +9819,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmul st4, st0": { @@ -9696,8 +9834,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9728,11 +9866,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmul st5, st0": { @@ -9743,8 +9881,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9775,11 +9913,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmul st6, st0": { @@ -9790,8 +9928,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9822,11 +9960,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmul st7, st0": { @@ -9837,8 +9975,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9869,11 +10007,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "db 0xdc, 0xe0": { @@ -9886,8 +10024,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9918,11 +10056,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubr st1, st0": { @@ -9933,8 +10071,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9965,11 +10103,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubr st2, st0": { @@ -9980,8 +10118,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10012,11 +10150,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubr st3, st0": { @@ -10027,8 +10165,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10059,11 +10197,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubr st4, st0": { @@ -10074,8 +10212,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10106,11 +10244,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubr st5, st0": { @@ -10121,8 +10259,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10153,11 +10291,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubr st6, st0": { @@ -10168,8 +10306,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10200,11 +10338,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubr st7, st0": { @@ -10215,8 +10353,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10247,11 +10385,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "db 0xdc, 0xe8": { @@ -10264,8 +10402,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10296,11 +10434,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsub st1, st0": { @@ -10311,8 +10449,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10343,11 +10481,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsub st2, st0": { @@ -10358,8 +10496,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10390,11 +10528,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsub st3, st0": { @@ -10405,8 +10543,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10437,11 +10575,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsub st4, st0": { @@ -10452,8 +10590,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10484,11 +10622,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsub st5, st0": { @@ -10499,8 +10637,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10531,11 +10669,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsub st6, st0": { @@ -10546,8 +10684,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10578,11 +10716,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsub st7, st0": { @@ -10593,8 +10731,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10625,11 +10763,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "db 0xdc, 0xf0": { @@ -10642,8 +10780,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10674,11 +10812,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivr st1, st0": { @@ -10689,8 +10827,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10721,11 +10859,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivr st2, st0": { @@ -10736,8 +10874,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10768,11 +10906,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivr st3, st0": { @@ -10783,8 +10921,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10815,11 +10953,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivr st4, st0": { @@ -10830,8 +10968,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10862,11 +11000,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivr st5, st0": { @@ -10877,8 +11015,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10909,11 +11047,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivr st6, st0": { @@ -10924,8 +11062,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10956,11 +11094,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivr st7, st0": { @@ -10971,8 +11109,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11003,11 +11141,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "db 0xdc, 0xf8": { @@ -11020,8 +11158,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11052,11 +11190,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdiv st1, st0": { @@ -11067,8 +11205,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11099,11 +11237,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdiv st2, st0": { @@ -11114,8 +11252,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11146,11 +11284,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdiv st3, st0": { @@ -11161,8 +11299,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11193,11 +11331,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdiv st4, st0": { @@ -11208,8 +11346,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11240,11 +11378,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdiv st5, st0": { @@ -11255,8 +11393,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11287,11 +11425,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdiv st6, st0": { @@ -11302,8 +11440,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11334,11 +11472,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdiv st7, st0": { @@ -11349,8 +11487,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11381,21 +11519,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fld qword [rax]": { - "ExpectedInstructionCount": 39, + "ExpectedInstructionCount": 40, "Comment": [ "0xdd !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -11420,23 +11559,23 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fisttp qword [rax]": { - "ExpectedInstructionCount": 38, + "ExpectedInstructionCount": 39, "Comment": [ "0xdd !11b /1" ], @@ -11470,19 +11609,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "mov x21, x0", - "str x21, [x4]", + "mov x22, x4", + "str x21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fst qword [rax]": { - "ExpectedInstructionCount": 30, + "ExpectedInstructionCount": 31, "Comment": [ "0xdd !11b /2" ], @@ -11515,12 +11655,13 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v2.8b, v0.8b", - "str d2, [x4]" + "mov v3.8b, v0.8b", + "mov x20, x4", + "str d3, [x20]" ] }, "fstp qword [rax]": { - "ExpectedInstructionCount": 38, + "ExpectedInstructionCount": 39, "Comment": [ "0xdd !11b /3" ], @@ -11553,278 +11694,289 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v2.8b, v0.8b", - "str d2, [x4]", + "mov v3.8b, v0.8b", + "mov x21, x4", + "str d3, [x21]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "frstor [rax]": { - "ExpectedInstructionCount": 107, + "ExpectedInstructionCount": 110, "Comment": [ "0xdd !11b /4" ], "ExpectedArm64ASM": [ - "ldrh w20, [x4]", - "strh w20, [x28, #1024]", - "ldr w20, [x4, #4]", - "ubfx w21, w20, #11, #3", - "strb w21, [x28, #747]", - "ubfx w22, w20, #8, #1", - "ubfx w23, w20, #9, #1", - "ubfx w24, w20, #10, #1", - "ubfx w20, w20, #14, #1", - "strb w22, [x28, #744]", - "strb w23, [x28, #745]", - "strb w24, [x28, #746]", - "strb w20, [x28, #750]", - "ldr w20, [x4, #8]", - "ubfx w22, w20, #0, #2", - "mrs x23, nzcv", - "cmp x22, #0x3 (3)", - "cset x22, ne", - "ubfx w24, w20, #2, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #1", - "ubfx w24, w20, #4, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #2", - "ubfx w24, w20, #6, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #3", - "ubfx w24, w20, #8, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #4", - "ubfx w24, w20, #10, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #5", - "ubfx w24, w20, #12, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #6", - "ubfx w20, w20, #14, #2", - "cmp x20, #0x3 (3)", - "cset x20, ne", - "orr w20, w22, w20, lsl #7", - "strb w20, [x28, #1026]", - "add x20, x4, #0x1c (28)", - "mov x22, #0xffffffffffffffff", - "mov w24, #0xffff", - "fmov d2, x22", - "mov v2.d[1], x24", - "ldur q3, [x4, #28]", - "and v3.16b, v3.16b, v2.16b", - "add x0, x28, x21, lsl #4", - "str q3, [x0, #768]", - "add x22, x20, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x20, #10]", - "and v3.16b, v3.16b, v2.16b", - "add x0, x28, x21, lsl #4", - "str q3, [x0, #768]", - "add x20, x22, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x22, #10]", - "and v3.16b, v3.16b, v2.16b", - "add x0, x28, x21, lsl #4", - "str q3, [x0, #768]", - "add x22, x20, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x20, #10]", - "and v3.16b, v3.16b, v2.16b", - "add x0, x28, x21, lsl #4", - "str q3, [x0, #768]", - "add x20, x22, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x22, #10]", - "and v3.16b, v3.16b, v2.16b", - "add x0, x28, x21, lsl #4", - "str q3, [x0, #768]", - "add x22, x20, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x20, #10]", - "and v3.16b, v3.16b, v2.16b", - "add x0, x28, x21, lsl #4", - "str q3, [x0, #768]", - "add x20, x22, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x22, #10]", - "and v2.16b, v3.16b, v2.16b", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]", - "add x22, x20, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur d2, [x20, #10]", - "ldr h3, [x22, #8]", - "mov v2.h[4], v3.h[0]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x23" + "mov x20, x4", + "ldrh w21, [x20]", + "strh w21, [x28, #1024]", + "ldr w21, [x20, #4]", + "ubfx w22, w21, #11, #3", + "strb w22, [x28, #747]", + "ubfx w23, w21, #8, #1", + "ubfx w24, w21, #9, #1", + "ubfx w25, w21, #10, #1", + "ubfx w30, w21, #14, #1", + "strb w23, [x28, #744]", + "strb w24, [x28, #745]", + "strb w25, [x28, #746]", + "strb w30, [x28, #750]", + "ldr w21, [x20, #8]", + "ubfx w23, w21, #0, #2", + "mrs x24, nzcv", + "cmp x23, #0x3 (3)", + "cset x25, ne", + "ubfx w23, w21, #2, #2", + "cmp x23, #0x3 (3)", + "cset x30, ne", + "orr w23, w25, w30, lsl #1", + "ubfx w25, w21, #4, #2", + "cmp x25, #0x3 (3)", + "cset x30, ne", + "orr w25, w23, w30, lsl #2", + "ubfx w23, w21, #6, #2", + "cmp x23, #0x3 (3)", + "cset x30, ne", + "orr w23, w25, w30, lsl #3", + "ubfx w25, w21, #8, #2", + "cmp x25, #0x3 (3)", + "cset x30, ne", + "orr w25, w23, w30, lsl #4", + "ubfx w23, w21, #10, #2", + "cmp x23, #0x3 (3)", + "cset x30, ne", + "orr w23, w25, w30, lsl #5", + "ubfx w25, w21, #12, #2", + "cmp x25, #0x3 (3)", + "cset x30, ne", + "orr w25, w23, w30, lsl #6", + "ubfx w23, w21, #14, #2", + "cmp x23, #0x3 (3)", + "cset x21, ne", + "orr w23, w25, w21, lsl #7", + "strb w23, [x28, #1026]", + "add x21, x20, #0x1c (28)", + "mov x23, #0xffffffffffffffff", + "mov w25, #0xffff", + "fmov d2, x23", + "mov v3.16b, v2.16b", + "mov v3.d[1], x25", + "ldur q2, [x20, #28]", + "and v4.16b, v2.16b, v3.16b", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]", + "add x20, x21, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x21, #10]", + "and v4.16b, v2.16b, v3.16b", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]", + "add x21, x20, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x20, #10]", + "and v4.16b, v2.16b, v3.16b", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]", + "add x20, x21, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x21, #10]", + "and v4.16b, v2.16b, v3.16b", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]", + "add x21, x20, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x20, #10]", + "and v4.16b, v2.16b, v3.16b", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]", + "add x20, x21, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x21, #10]", + "and v4.16b, v2.16b, v3.16b", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]", + "add x21, x20, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x20, #10]", + "and v4.16b, v2.16b, v3.16b", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]", + "add x20, x21, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur d2, [x21, #10]", + "ldr h3, [x20, #8]", + "mov v4.16b, v2.16b", + "mov v4.h[4], v3.h[0]", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]", + "msr nzcv, x24" ] }, "fnsave [rax]": { - "ExpectedInstructionCount": 119, + "ExpectedInstructionCount": 124, "Comment": [ "0xdd !11b /6" ], "ExpectedArm64ASM": [ - "ldrb w20, [x28, #747]", - "ldrh w21, [x28, #1024]", - "str w21, [x4]", - "mov w21, #0x0", - "mov x22, x21", - "bfi x22, x20, #11, #3", - "ldrb w23, [x28, #744]", - "ldrb w24, [x28, #745]", - "ldrb w25, [x28, #746]", - "ldrb w30, [x28, #750]", - "orr x22, x22, x23, lsl #8", - "orr x22, x22, x24, lsl #9", - "orr x22, x22, x25, lsl #10", - "orr x22, x22, x30, lsl #14", - "str w22, [x4, #4]", - "ldrb w22, [x28, #1026]", - "and w23, w22, #0x1", + "sub sp, sp, #0x20 (32)", + "mov x20, x4", + "ldrb w21, [x28, #747]", + "ldrh w22, [x28, #1024]", + "str w22, [x20]", + "mov w22, #0x0", + "mov x23, x22", + "bfi x23, x21, #11, #3", + "ldrb w24, [x28, #744]", + "ldrb w25, [x28, #745]", + "ldrb w30, [x28, #746]", + "ldrb w18, [x28, #750]", + "strb w21, [sp]", + "orr x21, x23, x24, lsl #8", + "orr x23, x21, x25, lsl #9", + "orr x21, x23, x30, lsl #10", + "orr x23, x21, x18, lsl #14", + "str w23, [x20, #4]", + "ldrb w21, [x28, #1026]", + "and w23, w21, #0x1", "mov w24, #0x3", "mrs x25, nzcv", "cmp x23, #0x0 (0)", - "csel x23, x24, x21, eq", - "orr w23, w21, w23", - "lsr w30, w22, #1", - "and w30, w30, #0x1", - "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #2", - "lsr w30, w22, #2", - "and w30, w30, #0x1", - "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #4", - "lsr w30, w22, #3", - "and w30, w30, #0x1", - "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #6", - "lsr w30, w22, #4", - "and w30, w30, #0x1", + "csel x30, x24, x22, eq", + "orr w23, w22, w30", + "lsr w30, w21, #1", + "and w18, w30, #0x1", + "cmp x18, #0x0 (0)", + "csel x30, x24, x22, eq", + "orr w18, w23, w30, lsl #2", + "lsr w23, w21, #2", + "and w30, w23, #0x1", "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #8", - "lsr w30, w22, #5", - "and w30, w30, #0x1", + "csel x23, x24, x22, eq", + "orr w30, w18, w23, lsl #4", + "lsr w23, w21, #3", + "and w18, w23, #0x1", + "cmp x18, #0x0 (0)", + "csel x23, x24, x22, eq", + "orr w18, w30, w23, lsl #6", + "lsr w23, w21, #4", + "and w30, w23, #0x1", "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #10", - "lsr w30, w22, #6", - "and w30, w30, #0x1", + "csel x23, x24, x22, eq", + "orr w30, w18, w23, lsl #8", + "lsr w23, w21, #5", + "and w18, w23, #0x1", + "cmp x18, #0x0 (0)", + "csel x23, x24, x22, eq", + "orr w18, w30, w23, lsl #10", + "lsr w23, w21, #6", + "and w30, w23, #0x1", "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #12", - "lsr w22, w22, #7", - "and w22, w22, #0x1", - "cmp x22, #0x0 (0)", - "csel x22, x24, x21, eq", - "orr w22, w23, w22, lsl #14", - "str w22, [x4, #8]", - "str w21, [x4, #12]", - "str w21, [x4, #16]", - "str w21, [x4, #20]", - "str w21, [x4, #24]", - "add x22, x4, #0x1c (28)", - "add x0, x28, x20, lsl #4", - "ldr q2, [x0, #768]", - "stur q2, [x4, #28]", - "add x23, x22, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", - "ldr q2, [x0, #768]", - "stur q2, [x22, #10]", - "add x22, x23, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", - "ldr q2, [x0, #768]", - "stur q2, [x23, #10]", - "add x23, x22, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", - "ldr q2, [x0, #768]", - "stur q2, [x22, #10]", - "add x22, x23, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", - "ldr q2, [x0, #768]", - "stur q2, [x23, #10]", - "add x23, x22, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", - "ldr q2, [x0, #768]", - "stur q2, [x22, #10]", - "add x22, x23, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", - "ldr q2, [x0, #768]", - "stur q2, [x23, #10]", - "add x23, x22, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", - "ldr q2, [x0, #768]", - "stur d2, [x22, #10]", - "dup v2.8h, v2.h[4]", - "str h2, [x23, #8]", + "csel x23, x24, x22, eq", + "orr w30, w18, w23, lsl #12", + "lsr w23, w21, #7", + "and w21, w23, #0x1", + "cmp x21, #0x0 (0)", + "csel x23, x24, x22, eq", + "orr w21, w30, w23, lsl #14", + "str w21, [x20, #8]", + "str w22, [x20, #12]", + "str w22, [x20, #16]", + "str w22, [x20, #20]", + "str w22, [x20, #24]", + "add x21, x20, #0x1c (28)", + "ldrb w23, [sp]", + "add x0, x28, x23, lsl #4", + "ldr q2, [x0, #768]", + "stur q2, [x20, #28]", + "add x20, x21, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", + "ldr q2, [x0, #768]", + "stur q2, [x21, #10]", + "add x21, x20, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", + "ldr q2, [x0, #768]", + "stur q2, [x20, #10]", + "add x20, x21, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", + "ldr q2, [x0, #768]", + "stur q2, [x21, #10]", + "add x21, x20, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", + "ldr q2, [x0, #768]", + "stur q2, [x20, #10]", + "add x20, x21, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", + "ldr q2, [x0, #768]", + "stur q2, [x21, #10]", + "add x21, x20, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", + "ldr q2, [x0, #768]", + "stur q2, [x20, #10]", + "add x20, x21, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", + "ldr q2, [x0, #768]", + "stur d2, [x21, #10]", + "dup v3.8h, v2.h[4]", + "str h3, [x20, #8]", "mov w20, #0x37f", "strh w20, [x28, #1024]", - "strb w21, [x28, #747]", - "strb w21, [x28, #744]", - "strb w21, [x28, #745]", - "strb w21, [x28, #746]", - "strb w21, [x28, #750]", - "strb w21, [x28, #1026]", - "msr nzcv, x25" + "strb w22, [x28, #747]", + "strb w22, [x28, #744]", + "strb w22, [x28, #745]", + "strb w22, [x28, #746]", + "strb w22, [x28, #750]", + "strb w22, [x28, #1026]", + "msr nzcv, x25", + "add sp, sp, #0x20 (32)" ] }, "fnstsw [rax]": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 14, "Comment": [ "0xdd !11b /7" ], "ExpectedArm64ASM": [ "mov w20, #0x0", "ldrb w21, [x28, #747]", - "bfi x20, x21, #11, #3", - "ldrb w21, [x28, #744]", - "ldrb w22, [x28, #745]", + "mov x22, x20", + "bfi x22, x21, #11, #3", + "ldrb w20, [x28, #744]", + "ldrb w21, [x28, #745]", "ldrb w23, [x28, #746]", "ldrb w24, [x28, #750]", - "orr x20, x20, x21, lsl #8", - "orr x20, x20, x22, lsl #9", - "orr x20, x20, x23, lsl #10", - "orr x20, x20, x24, lsl #14", - "strh w20, [x4]" + "orr x25, x22, x20, lsl #8", + "orr x20, x25, x21, lsl #9", + "orr x21, x20, x23, lsl #10", + "orr x20, x21, x24, lsl #14", + "mov x21, x4", + "strh w20, [x21]" ] }, "ffree st0": { @@ -11834,12 +11986,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x0 (0)", - "and w20, w20, #0x7", + "add w21, w20, #0x0 (0)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -11851,11 +12003,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w20, w21, w20", - "bic w20, w22, w20", + "lsl w23, w21, w20", + "bic w20, w22, w23", "strb w20, [x28, #1026]" ] }, @@ -11866,12 +12018,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x2 (2)", - "and w20, w20, #0x7", + "add w21, w20, #0x2 (2)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -11882,12 +12034,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x3 (3)", - "and w20, w20, #0x7", + "add w21, w20, #0x3 (3)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -11898,12 +12050,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x4 (4)", - "and w20, w20, #0x7", + "add w21, w20, #0x4 (4)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -11914,12 +12066,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x5 (5)", - "and w20, w20, #0x7", + "add w21, w20, #0x5 (5)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -11930,12 +12082,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x6 (6)", - "and w20, w20, #0x7", + "add w21, w20, #0x6 (6)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -11946,12 +12098,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x7 (7)", - "and w20, w20, #0x7", + "add w21, w20, #0x7 (7)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -11963,10 +12115,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -11978,10 +12130,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -11993,10 +12145,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -12008,10 +12160,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -12023,10 +12175,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -12038,10 +12190,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -12053,10 +12205,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -12068,10 +12220,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -12083,18 +12235,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12107,17 +12259,17 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", + "and w23, w22, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "str q2, [x0, #768]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12129,18 +12281,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12152,18 +12304,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12175,18 +12327,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12198,18 +12350,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12221,18 +12373,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12244,18 +12396,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12268,8 +12420,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x0", "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12303,13 +12455,13 @@ "mov x20, x0", "ubfx x22, x20, #1, #1", "ubfx x23, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w22, w22, w20", - "orr w23, w23, w20", - "strb w22, [x28, #744]", + "ubfx x24, x20, #2, #1", + "orr w20, w22, w24", + "orr w22, w23, w24", + "strb w20, [x28, #744]", "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w23, [x28, #750]" + "strb w24, [x28, #746]", + "strb w22, [x28, #750]" ] }, "fucom st1": { @@ -12320,8 +12472,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12355,14 +12507,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fucom st2": { @@ -12373,8 +12525,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12408,14 +12560,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fucom st3": { @@ -12426,8 +12578,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12461,14 +12613,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fucom st4": { @@ -12479,8 +12631,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12514,14 +12666,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fucom st5": { @@ -12532,8 +12684,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12567,14 +12719,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fucom st6": { @@ -12585,8 +12737,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12620,14 +12772,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fucom st7": { @@ -12638,8 +12790,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12673,14 +12825,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fucomp st0": { @@ -12692,8 +12844,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x0", "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12727,20 +12879,20 @@ "mov x22, x0", "ubfx x23, x22, #1, #1", "ubfx x24, x22, #0, #1", - "ubfx x22, x22, #2, #1", - "orr w23, w23, w22", - "orr w24, w24, w22", - "strb w23, [x28, #744]", + "ubfx x25, x22, #2, #1", + "orr w22, w23, w25", + "orr w23, w24, w25", + "strb w22, [x28, #744]", "strb w21, [x28, #745]", - "strb w22, [x28, #746]", - "strb w24, [x28, #750]", + "strb w25, [x28, #746]", + "strb w23, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12753,8 +12905,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12788,20 +12940,20 @@ "mov x22, x0", "ubfx x23, x22, #1, #1", "ubfx x24, x22, #0, #1", - "ubfx x22, x22, #2, #1", - "orr w23, w23, w22", - "orr w24, w24, w22", - "strb w23, [x28, #744]", - "mov w23, #0x0", - "strb w23, [x28, #745]", - "strb w22, [x28, #746]", - "strb w24, [x28, #750]", + "ubfx x25, x22, #2, #1", + "orr w22, w23, w25", + "orr w23, w24, w25", + "strb w22, [x28, #744]", + "mov w22, #0x0", + "strb w22, [x28, #745]", + "strb w25, [x28, #746]", + "strb w23, [x28, #750]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12813,8 +12965,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12848,21 +13000,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12874,8 +13026,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12909,21 +13061,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12935,8 +13087,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12970,21 +13122,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12996,8 +13148,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -13031,21 +13183,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -13057,8 +13209,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -13092,21 +13244,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -13118,8 +13270,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -13153,32 +13305,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fiadd word [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xde !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", + "mov x21, x4", + "ldrh w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -13190,7 +13343,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "sxth w1, w21", + "sxth w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1312]", "blr x2", @@ -13235,21 +13388,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fimul word [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xde !11b /1" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", + "mov x21, x4", + "ldrh w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -13261,7 +13415,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "sxth w1, w21", + "sxth w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1312]", "blr x2", @@ -13306,21 +13460,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "ficom word [rax]": { - "ExpectedInstructionCount": 69, + "ExpectedInstructionCount": 70, "Comment": [ "0xde !11b /2" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", + "mov x21, x4", + "ldrh w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -13332,7 +13487,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "sxth w1, w21", + "sxth w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1312]", "blr x2", @@ -13380,24 +13535,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "ficomp word [rax]": { - "ExpectedInstructionCount": 77, + "ExpectedInstructionCount": 78, "Comment": [ "0xde !11b /3" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", + "mov x21, x4", + "ldrh w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -13409,7 +13565,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "sxth w1, w21", + "sxth w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1312]", "blr x2", @@ -13457,32 +13613,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fisub word [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xde !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", + "mov x21, x4", + "ldrh w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -13494,7 +13651,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "sxth w1, w21", + "sxth w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1312]", "blr x2", @@ -13539,21 +13696,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fisubr word [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xde !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", + "mov x21, x4", + "ldrh w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -13565,7 +13723,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "sxth w1, w21", + "sxth w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1312]", "blr x2", @@ -13610,21 +13768,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fidiv word [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xde !11b /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", + "mov x21, x4", + "ldrh w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -13636,7 +13795,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "sxth w1, w21", + "sxth w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1312]", "blr x2", @@ -13681,21 +13840,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fidivr word [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xde !11b /7" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", + "mov x21, x4", + "ldrh w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -13707,7 +13867,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "sxth w1, w21", + "sxth w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1312]", "blr x2", @@ -13752,11 +13912,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "faddp st0": { @@ -13767,8 +13927,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -13799,19 +13959,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "faddp st1": { @@ -13823,8 +13983,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -13855,18 +14015,18 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str q4, [x0, #768]" ] }, "faddp st2": { @@ -13877,8 +14037,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -13909,19 +14069,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "faddp st3": { @@ -13932,8 +14092,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -13964,19 +14124,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "faddp st4": { @@ -13987,8 +14147,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14019,19 +14179,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "faddp st5": { @@ -14042,8 +14202,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14074,19 +14234,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "faddp st6": { @@ -14097,8 +14257,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14129,19 +14289,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "faddp st7": { @@ -14152,8 +14312,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14184,19 +14344,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmulp st0": { @@ -14207,8 +14367,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14239,19 +14399,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmulp st1": { @@ -14263,8 +14423,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14295,18 +14455,18 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str q4, [x0, #768]" ] }, "fmulp st2": { @@ -14317,8 +14477,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14349,19 +14509,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmulp st3": { @@ -14372,8 +14532,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14404,19 +14564,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmulp st4": { @@ -14427,8 +14587,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14459,19 +14619,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmulp st5": { @@ -14482,8 +14642,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14514,19 +14674,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmulp st6": { @@ -14537,8 +14697,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14569,19 +14729,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmulp st7": { @@ -14592,8 +14752,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14624,19 +14784,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fcompp": { @@ -14648,8 +14808,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14683,25 +14843,25 @@ "mov x22, x0", "ubfx x23, x22, #1, #1", "ubfx x24, x22, #0, #1", - "ubfx x22, x22, #2, #1", - "orr w23, w23, w22", - "orr w24, w24, w22", - "strb w23, [x28, #744]", - "mov w23, #0x0", - "strb w23, [x28, #745]", - "strb w22, [x28, #746]", - "strb w24, [x28, #750]", + "ubfx x25, x22, #2, #1", + "orr w22, w23, w25", + "orr w23, w24, w25", + "strb w22, [x28, #744]", + "mov w22, #0x0", + "strb w22, [x28, #745]", + "strb w25, [x28, #746]", + "strb w23, [x28, #750]", "ldrb w22, [x28, #1026]", "lsl w23, w21, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "bic w24, w22, w23", + "strb w24, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "lsl w22, w21, w20", + "bic w21, w24, w22", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -14715,8 +14875,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14747,19 +14907,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubrp st1, st0": { @@ -14771,8 +14931,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14803,18 +14963,18 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str q4, [x0, #768]" ] }, "fsubrp st2, st0": { @@ -14825,8 +14985,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14857,19 +15017,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubrp st3, st0": { @@ -14880,8 +15040,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14912,19 +15072,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubrp st4, st0": { @@ -14935,8 +15095,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14967,19 +15127,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubrp st5, st0": { @@ -14990,8 +15150,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15022,19 +15182,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubrp st6, st0": { @@ -15045,8 +15205,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15077,19 +15237,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubrp st7, st0": { @@ -15100,8 +15260,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15132,19 +15292,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "db 0xde, 0xe8": { @@ -15157,8 +15317,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15189,19 +15349,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubp st1, st0": { @@ -15213,8 +15373,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15245,18 +15405,18 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str q4, [x0, #768]" ] }, "fsubp st2, st0": { @@ -15267,8 +15427,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15299,19 +15459,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubp st3, st0": { @@ -15322,8 +15482,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15354,19 +15514,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubp st4, st0": { @@ -15377,8 +15537,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15409,19 +15569,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubp st5, st0": { @@ -15432,8 +15592,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15464,19 +15624,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubp st6, st0": { @@ -15487,8 +15647,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15519,19 +15679,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubp st7, st0": { @@ -15542,8 +15702,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15574,19 +15734,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "db 0xde, 0xf0": { @@ -15599,8 +15759,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15631,19 +15791,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivrp st1, st0": { @@ -15655,8 +15815,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15687,18 +15847,18 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str q4, [x0, #768]" ] }, "fdivrp st2, st0": { @@ -15709,8 +15869,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15741,19 +15901,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivrp st3, st0": { @@ -15764,8 +15924,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15796,19 +15956,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivrp st4, st0": { @@ -15819,8 +15979,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15851,19 +16011,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivrp st5, st0": { @@ -15874,8 +16034,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15906,19 +16066,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivrp st6, st0": { @@ -15929,8 +16089,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15961,19 +16121,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivrp st7, st0": { @@ -15984,8 +16144,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16016,19 +16176,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "db 0xde, 0xf8": { @@ -16041,8 +16201,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16073,19 +16233,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivp st1, st0": { @@ -16097,8 +16257,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16129,18 +16289,18 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str q4, [x0, #768]" ] }, "fdivp st2, st0": { @@ -16151,8 +16311,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16183,19 +16343,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivp st3, st0": { @@ -16206,8 +16366,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16238,19 +16398,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivp st4, st0": { @@ -16261,8 +16421,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16293,19 +16453,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivp st5, st0": { @@ -16316,8 +16476,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16348,19 +16508,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivp st6, st0": { @@ -16371,8 +16531,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16403,19 +16563,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivp st7, st0": { @@ -16426,8 +16586,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16458,66 +16618,71 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fild word [rax]": { - "ExpectedInstructionCount": 35, + "ExpectedInstructionCount": 40, "Comment": [ "0xdf !11b /0" ], "ExpectedArm64ASM": [ + "sub sp, sp, #0x20 (32)", "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "ldrh w21, [x4]", - "mov w22, #0x0", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "mov w21, #0x0", + "sxth x23, w22", "mrs x22, nzcv", - "cmp x21, #0x0 (0)", - "mov w23, #0x8000", - "csel x23, x23, xzr, lt", - "cneg x21, x21, mi", - "mov w24, #0x3f", + "cmp x23, #0x0 (0)", + "mov w24, #0x8000", + "csel x25, x24, xzr, lt", + "cneg x24, x23, mi", + "mov w23, #0x3f", "mov x0, #0x3f", - "clz x25, x21", - "sub x25, x0, x25", - "sub x24, x24, x25", - "lsl x25, x21, x24", + "clz x30, x24", + "sub x30, x0, x30", + "sub x18, x23, x30", + "lsl x23, x24, x18", "mov w30, #0x403e", - "sub x24, x30, x24", - "mov w30, #0x0", - "cmp x21, #0x0 (0)", - "csel x21, x30, x24, eq", - "orr x21, x23, x21", - "fmov d2, x25", + "str w22, [sp]", + "sub x22, x30, x18", + "cmp x24, #0x0 (0)", + "csel x30, x21, x22, eq", + "orr x21, x25, x30", + "fmov d2, x23", "fmov d3, x21", - "mov v2.d[1], v3.d[0]", + "mov v4.16b, v2.16b", + "mov v4.d[1], v3.d[0]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x22" + "str q4, [x0, #768]", + "ldr w20, [sp]", + "msr nzcv, x20", + "add sp, sp, #0x20 (32)" ] }, "fisttp word [rax]": { - "ExpectedInstructionCount": 38, + "ExpectedInstructionCount": 39, "Comment": [ "0xdf !11b /1" ], @@ -16551,19 +16716,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "sxth x21, w0", - "strh w21, [x4]", + "mov x22, x4", + "strh w21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fist word [rax]": { - "ExpectedInstructionCount": 30, + "ExpectedInstructionCount": 31, "Comment": [ "0xdf !11b /2" ], @@ -16597,11 +16763,12 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "sxth x20, w0", - "strh w20, [x4]" + "mov x21, x4", + "strh w20, [x21]" ] }, "fistp word [rax]": { - "ExpectedInstructionCount": 38, + "ExpectedInstructionCount": 39, "Comment": [ "0xdf !11b /3" ], @@ -16635,33 +16802,35 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "sxth x21, w0", - "strh w21, [x4]", + "mov x22, x4", + "strh w21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fbld tword [rax]": { - "ExpectedInstructionCount": 40, + "ExpectedInstructionCount": 41, "Comment": [ "0xdf !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "ldr q2, [x4]", + "mov x21, x4", + "ldr q2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -16687,15 +16856,15 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fbstp tword [rax]": { - "ExpectedInstructionCount": 42, + "ExpectedInstructionCount": 43, "Comment": [ "0xdf !11b /6" ], @@ -16728,19 +16897,20 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "str d2, [x4]", - "mov x21, v2.d[1]", - "strh w21, [x4, #8]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "mov x21, x4", + "str d3, [x21]", + "mov x22, v3.d[1]", + "strh w22, [x21, #8]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -16751,8 +16921,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -16763,8 +16933,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -16775,8 +16945,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -16787,8 +16957,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -16799,8 +16969,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -16811,8 +16981,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -16823,8 +16993,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -16835,41 +17005,45 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fnstsw ax": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 16, "Comment": [ "0xdf 11b 0xe0 /4" ], "ExpectedArm64ASM": [ "mov w20, #0x0", "ldrb w21, [x28, #747]", - "bfi x20, x21, #11, #3", - "ldrb w21, [x28, #744]", - "ldrb w22, [x28, #745]", + "mov x22, x20", + "bfi x22, x21, #11, #3", + "ldrb w20, [x28, #744]", + "ldrb w21, [x28, #745]", "ldrb w23, [x28, #746]", "ldrb w24, [x28, #750]", - "orr x20, x20, x21, lsl #8", - "orr x20, x20, x22, lsl #9", - "orr x20, x20, x23, lsl #10", - "orr x20, x20, x24, lsl #14", - "bfxil x4, x20, #0, #16" + "orr x25, x22, x20, lsl #8", + "orr x20, x25, x21, lsl #9", + "orr x21, x20, x23, lsl #10", + "orr x20, x21, x24, lsl #14", + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "fucomip st0": { - "ExpectedInstructionCount": 51, + "ExpectedInstructionCount": 52, "Comment": [ "0xdf 11b 0xe8 /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16903,24 +17077,25 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "rmif x22, #63, #nzCv", - "rmif x23, #62, #nZcv", - "mov w22, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "rmif x21, #63, #nzCv", + "rmif x22, #62, #nZcv", + "mov w21, #0x1", + "eor w22, w24, #0x1", + "mov x26, x22", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fucomip st1": { - "ExpectedInstructionCount": 51, + "ExpectedInstructionCount": 52, "Comment": [ "0xdf 11b 0xe9 /5" ], @@ -16928,8 +17103,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16963,31 +17138,32 @@ "mov x22, x0", "ubfx x23, x22, #1, #1", "ubfx x24, x22, #0, #1", - "ubfx x22, x22, #2, #1", - "orr w23, w23, w22", - "orr w24, w24, w22", - "rmif x23, #63, #nzCv", - "rmif x24, #62, #nZcv", - "eor w26, w22, #0x1", + "ubfx x25, x22, #2, #1", + "orr w22, w23, w25", + "orr w23, w24, w25", + "rmif x22, #63, #nzCv", + "rmif x23, #62, #nZcv", + "eor w22, w25, #0x1", + "mov x26, x22", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fucomip st2": { - "ExpectedInstructionCount": 51, + "ExpectedInstructionCount": 52, "Comment": [ "0xdf 11b 0xea /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17021,32 +17197,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "rmif x22, #63, #nzCv", - "rmif x23, #62, #nZcv", - "mov w22, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "rmif x21, #63, #nzCv", + "rmif x22, #62, #nZcv", + "mov w21, #0x1", + "eor w22, w24, #0x1", + "mov x26, x22", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fucomip st3": { - "ExpectedInstructionCount": 51, + "ExpectedInstructionCount": 52, "Comment": [ "0xdf 11b 0xeb /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17080,32 +17257,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "rmif x22, #63, #nzCv", - "rmif x23, #62, #nZcv", - "mov w22, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "rmif x21, #63, #nzCv", + "rmif x22, #62, #nZcv", + "mov w21, #0x1", + "eor w22, w24, #0x1", + "mov x26, x22", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fucomip st4": { - "ExpectedInstructionCount": 51, + "ExpectedInstructionCount": 52, "Comment": [ "0xdf 11b 0xec /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17139,32 +17317,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "rmif x22, #63, #nzCv", - "rmif x23, #62, #nZcv", - "mov w22, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "rmif x21, #63, #nzCv", + "rmif x22, #62, #nZcv", + "mov w21, #0x1", + "eor w22, w24, #0x1", + "mov x26, x22", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fucomip st5": { - "ExpectedInstructionCount": 51, + "ExpectedInstructionCount": 52, "Comment": [ "0xdf 11b 0xed /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17198,32 +17377,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "rmif x22, #63, #nzCv", - "rmif x23, #62, #nZcv", - "mov w22, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "rmif x21, #63, #nzCv", + "rmif x22, #62, #nZcv", + "mov w21, #0x1", + "eor w22, w24, #0x1", + "mov x26, x22", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fucomip st6": { - "ExpectedInstructionCount": 51, + "ExpectedInstructionCount": 52, "Comment": [ "0xdf 11b 0xee /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17257,32 +17437,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "rmif x22, #63, #nzCv", - "rmif x23, #62, #nZcv", - "mov w22, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "rmif x21, #63, #nzCv", + "rmif x22, #62, #nZcv", + "mov w21, #0x1", + "eor w22, w24, #0x1", + "mov x26, x22", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fucomip st7": { - "ExpectedInstructionCount": 51, + "ExpectedInstructionCount": 52, "Comment": [ "0xdf 11b 0xef /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17316,32 +17497,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "rmif x22, #63, #nzCv", - "rmif x23, #62, #nZcv", - "mov w22, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "rmif x21, #63, #nzCv", + "rmif x22, #62, #nZcv", + "mov w21, #0x1", + "eor w22, w24, #0x1", + "mov x26, x22", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcomip st0": { - "ExpectedInstructionCount": 51, + "ExpectedInstructionCount": 52, "Comment": [ "0xdf 11b 0xf0 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17375,24 +17557,25 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "rmif x22, #63, #nzCv", - "rmif x23, #62, #nZcv", - "mov w22, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "rmif x21, #63, #nzCv", + "rmif x22, #62, #nZcv", + "mov w21, #0x1", + "eor w22, w24, #0x1", + "mov x26, x22", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcomip st1": { - "ExpectedInstructionCount": 51, + "ExpectedInstructionCount": 52, "Comment": [ "0xdf 11b 0xf1 /6" ], @@ -17400,8 +17583,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17435,31 +17618,32 @@ "mov x22, x0", "ubfx x23, x22, #1, #1", "ubfx x24, x22, #0, #1", - "ubfx x22, x22, #2, #1", - "orr w23, w23, w22", - "orr w24, w24, w22", - "rmif x23, #63, #nzCv", - "rmif x24, #62, #nZcv", - "eor w26, w22, #0x1", + "ubfx x25, x22, #2, #1", + "orr w22, w23, w25", + "orr w23, w24, w25", + "rmif x22, #63, #nzCv", + "rmif x23, #62, #nZcv", + "eor w22, w25, #0x1", + "mov x26, x22", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcomip st2": { - "ExpectedInstructionCount": 51, + "ExpectedInstructionCount": 52, "Comment": [ "0xdf 11b 0xf2 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17493,32 +17677,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "rmif x22, #63, #nzCv", - "rmif x23, #62, #nZcv", - "mov w22, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "rmif x21, #63, #nzCv", + "rmif x22, #62, #nZcv", + "mov w21, #0x1", + "eor w22, w24, #0x1", + "mov x26, x22", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcomip st3": { - "ExpectedInstructionCount": 51, + "ExpectedInstructionCount": 52, "Comment": [ "0xdf 11b 0xf3 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17552,32 +17737,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "rmif x22, #63, #nzCv", - "rmif x23, #62, #nZcv", - "mov w22, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "rmif x21, #63, #nzCv", + "rmif x22, #62, #nZcv", + "mov w21, #0x1", + "eor w22, w24, #0x1", + "mov x26, x22", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcomip st4": { - "ExpectedInstructionCount": 51, + "ExpectedInstructionCount": 52, "Comment": [ "0xdf 11b 0xf4 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17611,32 +17797,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "rmif x22, #63, #nzCv", - "rmif x23, #62, #nZcv", - "mov w22, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "rmif x21, #63, #nzCv", + "rmif x22, #62, #nZcv", + "mov w21, #0x1", + "eor w22, w24, #0x1", + "mov x26, x22", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcomip st5": { - "ExpectedInstructionCount": 51, + "ExpectedInstructionCount": 52, "Comment": [ "0xdf 11b 0xf5 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17670,32 +17857,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "rmif x22, #63, #nzCv", - "rmif x23, #62, #nZcv", - "mov w22, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "rmif x21, #63, #nzCv", + "rmif x22, #62, #nZcv", + "mov w21, #0x1", + "eor w22, w24, #0x1", + "mov x26, x22", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcomip st6": { - "ExpectedInstructionCount": 51, + "ExpectedInstructionCount": 52, "Comment": [ "0xdf 11b 0xf6 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17729,32 +17917,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "rmif x22, #63, #nzCv", - "rmif x23, #62, #nZcv", - "mov w22, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "rmif x21, #63, #nzCv", + "rmif x22, #62, #nZcv", + "mov w21, #0x1", + "eor w22, w24, #0x1", + "mov x26, x22", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcomip st7": { - "ExpectedInstructionCount": 51, + "ExpectedInstructionCount": 52, "Comment": [ "0xdf 11b 0xf7 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17788,19 +17977,20 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "rmif x22, #63, #nzCv", - "rmif x23, #62, #nZcv", - "mov w22, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "rmif x21, #63, #nzCv", + "rmif x22, #62, #nZcv", + "mov w21, #0x1", + "eor w22, w24, #0x1", + "mov x26, x22", + "ldrb w22, [x28, #1026]", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] } diff --git a/unittests/InstructionCountCI/FlagM/x87_f64.json b/unittests/InstructionCountCI/FlagM/x87_f64.json index 8a8148774d..4feb9ac59a 100644 --- a/unittests/InstructionCountCI/FlagM/x87_f64.json +++ b/unittests/InstructionCountCI/FlagM/x87_f64.json @@ -16,49 +16,52 @@ }, "Instructions": { "fadd dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xd8 !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "add x0, x28, x20, lsl #4", - "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "ldr d2, [x0, #768]", + "fadd d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xd8 !11b /1" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "add x0, x28, x20, lsl #4", - "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "ldr d2, [x0, #768]", + "fmul d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fcom dword [rax]": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 16, "Comment": [ "0xd8 !11b /2" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "add x0, x28, x20, lsl #4", - "ldr d3, [x0, #768]", - "fcmp d3, d2", + "ldr d2, [x0, #768]", + "fcmp d2, d3", "mov w20, #0x0", "cset w21, vs", "axflag", @@ -71,17 +74,18 @@ ] }, "fcomp dword [rax]": { - "ExpectedInstructionCount": 23, + "ExpectedInstructionCount": 24, "Comment": [ "0xd8 !11b /3" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "add x0, x28, x20, lsl #4", - "ldr d3, [x0, #768]", - "fcmp d3, d2", + "ldr d2, [x0, #768]", + "fcmp d2, d3", "mov w21, #0x1", "mov w22, #0x0", "cset w23, vs", @@ -93,76 +97,80 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fsub dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xd8 !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "add x0, x28, x20, lsl #4", - "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "ldr d2, [x0, #768]", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xd8 !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "add x0, x28, x20, lsl #4", - "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "ldr d2, [x0, #768]", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xd8 !11b /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "add x0, x28, x20, lsl #4", - "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "ldr d2, [x0, #768]", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xd8 !11b /7" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "add x0, x28, x20, lsl #4", - "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "ldr d2, [x0, #768]", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fadd st0, st0": { @@ -173,14 +181,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fadd st0, st1": { @@ -191,14 +199,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fadd st0, st2": { @@ -209,14 +217,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fadd st0, st3": { @@ -227,14 +235,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fadd st0, st4": { @@ -245,14 +253,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fadd st0, st5": { @@ -263,14 +271,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fadd st0, st6": { @@ -281,14 +289,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fadd st0, st7": { @@ -299,14 +307,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul st0, st0": { @@ -317,14 +325,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmul st0, st1": { @@ -335,14 +343,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul st0, st2": { @@ -353,14 +361,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul st0, st3": { @@ -371,14 +379,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul st0, st4": { @@ -389,14 +397,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul st0, st5": { @@ -407,14 +415,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul st0, st6": { @@ -425,14 +433,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul st0, st7": { @@ -443,14 +451,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fcom st0, st0": { @@ -462,8 +470,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x0", "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -486,8 +494,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -511,8 +519,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -536,8 +544,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -561,8 +569,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -586,8 +594,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -611,8 +619,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -636,8 +644,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -662,8 +670,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x0", "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -678,11 +686,11 @@ "strb w21, [x28, #745]", "strb w23, [x28, #746]", "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -695,8 +703,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -711,11 +719,11 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -727,8 +735,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -744,11 +752,11 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -760,8 +768,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -777,11 +785,11 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -793,8 +801,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -810,11 +818,11 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -826,8 +834,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -843,11 +851,11 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -859,8 +867,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -876,11 +884,11 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -892,8 +900,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -909,11 +917,11 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -925,14 +933,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsub st0, st1": { @@ -943,14 +951,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsub st0, st2": { @@ -961,14 +969,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsub st0, st3": { @@ -979,14 +987,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsub st0, st4": { @@ -997,14 +1005,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsub st0, st5": { @@ -1015,14 +1023,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsub st0, st6": { @@ -1033,14 +1041,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsub st0, st7": { @@ -1051,14 +1059,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr st0, st0": { @@ -1069,14 +1077,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubr st0, st1": { @@ -1087,14 +1095,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr st0, st2": { @@ -1105,14 +1113,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr st0, st3": { @@ -1123,14 +1131,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr st0, st4": { @@ -1141,14 +1149,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr st0, st5": { @@ -1159,14 +1167,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr st0, st6": { @@ -1177,14 +1185,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr st0, st7": { @@ -1195,14 +1203,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv st0, st0": { @@ -1213,14 +1221,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdiv st0, st1": { @@ -1231,14 +1239,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv st0, st2": { @@ -1249,14 +1257,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv st0, st3": { @@ -1267,14 +1275,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv st0, st4": { @@ -1285,14 +1293,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv st0, st5": { @@ -1303,14 +1311,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv st0, st6": { @@ -1321,14 +1329,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv st0, st7": { @@ -1339,14 +1347,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr st0, st0": { @@ -1357,14 +1365,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivr st0, st1": { @@ -1375,14 +1383,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr st0, st2": { @@ -1393,14 +1401,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr st0, st3": { @@ -1411,14 +1419,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr st0, st4": { @@ -1429,14 +1437,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr st0, st5": { @@ -1447,14 +1455,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr st0, st6": { @@ -1465,14 +1473,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr st0, st7": { @@ -1483,39 +1491,40 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fld dword [rax]": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xd9 !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "fst dword [rax]": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": [ "0xd9 !11b /2" ], @@ -1523,12 +1532,13 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "fcvt s2, d2", - "str s2, [x4]" + "fcvt s3, d2", + "mov x20, x4", + "str s3, [x20]" ] }, "fstp dword [rax]": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xd9 !11b /3" ], @@ -1536,98 +1546,101 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "fcvt s2, d2", - "str s2, [x4]", + "fcvt s3, d2", + "mov x21, x4", + "str s3, [x21]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fldenv [rax]": { - "ExpectedInstructionCount": 56, + "ExpectedInstructionCount": 57, "Comment": [ "0xd9 !11b /4" ], "ExpectedArm64ASM": [ - "ldrh w20, [x4]", - "ubfx w21, w20, #10, #3", - "rbit w1, w21", + "mov x20, x4", + "ldrh w21, [x20]", + "ubfx w22, w21, #10, #3", + "rbit w1, w22", "lsr w1, w1, #30", "mrs x0, fpcr", "bfi x0, x1, #22, #2", - "lsr x1, x21, #2", + "lsr x1, x22, #2", "bfi x0, x1, #24, #1", "msr fpcr, x0", - "strh w20, [x28, #1024]", - "ldr w20, [x4, #4]", - "ubfx w21, w20, #11, #3", - "strb w21, [x28, #747]", - "ubfx w21, w20, #8, #1", - "ubfx w22, w20, #9, #1", - "ubfx w23, w20, #10, #1", - "ubfx w20, w20, #14, #1", - "strb w21, [x28, #744]", - "strb w22, [x28, #745]", - "strb w23, [x28, #746]", - "strb w20, [x28, #750]", - "ldr w20, [x4, #8]", - "ubfx w21, w20, #0, #2", + "strh w21, [x28, #1024]", + "ldr w21, [x20, #4]", + "ubfx w22, w21, #11, #3", + "strb w22, [x28, #747]", + "ubfx w22, w21, #8, #1", + "ubfx w23, w21, #9, #1", + "ubfx w24, w21, #10, #1", + "ubfx w25, w21, #14, #1", + "strb w22, [x28, #744]", + "strb w23, [x28, #745]", + "strb w24, [x28, #746]", + "strb w25, [x28, #750]", + "ldr w21, [x20, #8]", + "ubfx w20, w21, #0, #2", "mrs x22, nzcv", - "cmp x21, #0x3 (3)", - "cset x21, ne", - "ubfx w23, w20, #2, #2", - "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #1", - "ubfx w23, w20, #4, #2", - "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #2", - "ubfx w23, w20, #6, #2", - "cmp x23, #0x3 (3)", + "cmp x20, #0x3 (3)", "cset x23, ne", - "orr w21, w21, w23, lsl #3", - "ubfx w23, w20, #8, #2", + "ubfx w20, w21, #2, #2", + "cmp x20, #0x3 (3)", + "cset x24, ne", + "orr w20, w23, w24, lsl #1", + "ubfx w23, w21, #4, #2", "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #4", - "ubfx w23, w20, #10, #2", + "cset x24, ne", + "orr w23, w20, w24, lsl #2", + "ubfx w20, w21, #6, #2", + "cmp x20, #0x3 (3)", + "cset x24, ne", + "orr w20, w23, w24, lsl #3", + "ubfx w23, w21, #8, #2", "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #5", - "ubfx w23, w20, #12, #2", + "cset x24, ne", + "orr w23, w20, w24, lsl #4", + "ubfx w20, w21, #10, #2", + "cmp x20, #0x3 (3)", + "cset x24, ne", + "orr w20, w23, w24, lsl #5", + "ubfx w23, w21, #12, #2", "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #6", - "ubfx w20, w20, #14, #2", + "cset x24, ne", + "orr w23, w20, w24, lsl #6", + "ubfx w20, w21, #14, #2", "cmp x20, #0x3 (3)", - "cset x20, ne", - "orr w20, w21, w20, lsl #7", + "cset x21, ne", + "orr w20, w23, w21, lsl #7", "strb w20, [x28, #1026]", "msr nzcv, x22" ] }, "fldcw [rax]": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 11, "Comment": [ "0xd9 !11b /5" ], "ExpectedArm64ASM": [ - "ldrh w20, [x4]", - "ubfx w21, w20, #10, #3", - "rbit w1, w21", + "mov x20, x4", + "ldrh w21, [x20]", + "ubfx w20, w21, #10, #3", + "rbit w1, w20", "lsr w1, w1, #30", "mrs x0, fpcr", "bfi x0, x1, #22, #2", - "lsr x1, x21, #2", + "lsr x1, x20, #2", "bfi x0, x1, #24, #1", "msr fpcr, x0", - "strh w20, [x28, #1024]" + "strh w21, [x28, #1024]" ] }, "fnstenv [rax]": { @@ -1636,80 +1649,81 @@ "0xd9 !11b /6" ], "ExpectedArm64ASM": [ - "ldrh w20, [x28, #1024]", - "str w20, [x4]", - "mov w20, #0x0", - "ldrb w21, [x28, #747]", - "mov x0, x20", - "bfi x0, x21, #11, #3", - "mov x21, x0", + "mov x20, x4", + "ldrh w21, [x28, #1024]", + "str w21, [x20]", + "mov w21, #0x0", + "ldrb w22, [x28, #747]", + "mov x23, x21", + "bfi x23, x22, #11, #3", "ldrb w22, [x28, #744]", - "ldrb w23, [x28, #745]", - "ldrb w24, [x28, #746]", - "ldrb w25, [x28, #750]", - "orr x21, x21, x22, lsl #8", - "orr x21, x21, x23, lsl #9", - "orr x21, x21, x24, lsl #10", - "orr x21, x21, x25, lsl #14", - "str w21, [x4, #4]", - "ldrb w21, [x28, #1026]", - "and w22, w21, #0x1", - "mov w23, #0x3", - "mrs x24, nzcv", + "ldrb w24, [x28, #745]", + "ldrb w25, [x28, #746]", + "ldrb w30, [x28, #750]", + "orr x18, x23, x22, lsl #8", + "orr x22, x18, x24, lsl #9", + "orr x23, x22, x25, lsl #10", + "orr x22, x23, x30, lsl #14", + "str w22, [x20, #4]", + "ldrb w22, [x28, #1026]", + "and w23, w22, #0x1", + "mov w24, #0x3", + "mrs x25, nzcv", + "cmp x23, #0x0 (0)", + "csel x30, x24, x21, eq", + "orr w23, w21, w30", + "lsr w30, w22, #1", + "and w18, w30, #0x1", + "cmp x18, #0x0 (0)", + "csel x30, x24, x21, eq", + "orr w18, w23, w30, lsl #2", + "lsr w23, w22, #2", + "and w30, w23, #0x1", + "cmp x30, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w30, w18, w23, lsl #4", + "lsr w23, w22, #3", + "and w18, w23, #0x1", + "cmp x18, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w18, w30, w23, lsl #6", + "lsr w23, w22, #4", + "and w30, w23, #0x1", + "cmp x30, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w30, w18, w23, lsl #8", + "lsr w23, w22, #5", + "and w18, w23, #0x1", + "cmp x18, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w18, w30, w23, lsl #10", + "lsr w23, w22, #6", + "and w30, w23, #0x1", + "cmp x30, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w30, w18, w23, lsl #12", + "lsr w23, w22, #7", + "and w22, w23, #0x1", "cmp x22, #0x0 (0)", - "csel x22, x23, x20, eq", - "orr w22, w20, w22", - "lsr w25, w21, #1", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #2", - "lsr w25, w21, #2", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #4", - "lsr w25, w21, #3", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #6", - "lsr w25, w21, #4", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #8", - "lsr w25, w21, #5", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #10", - "lsr w25, w21, #6", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #12", - "lsr w21, w21, #7", - "and w21, w21, #0x1", - "cmp x21, #0x0 (0)", - "csel x21, x23, x20, eq", - "orr w21, w22, w21, lsl #14", - "str w21, [x4, #8]", - "str w20, [x4, #12]", - "str w20, [x4, #16]", - "str w20, [x4, #20]", - "str w20, [x4, #24]", - "msr nzcv, x24" + "csel x23, x24, x21, eq", + "orr w22, w30, w23, lsl #14", + "str w22, [x20, #8]", + "str w21, [x20, #12]", + "str w21, [x20, #16]", + "str w21, [x20, #20]", + "str w21, [x20, #24]", + "msr nzcv, x25" ] }, "fnstcw [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": [ "0xd9 !11b /7" ], "ExpectedArm64ASM": [ "ldrh w20, [x28, #1024]", - "strh w20, [x4]" + "mov x21, x4", + "strh w20, [x21]" ] }, "fld st0": { @@ -1720,15 +1734,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -1744,14 +1758,14 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -1766,15 +1780,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -1789,15 +1803,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -1812,15 +1826,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -1835,15 +1849,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -1858,15 +1872,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -1881,15 +1895,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -1904,14 +1918,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -1923,14 +1937,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -1942,14 +1956,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -1961,14 +1975,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -1980,14 +1994,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -1999,14 +2013,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -2018,14 +2032,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -2037,14 +2051,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -2064,9 +2078,9 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "fneg v2.2d, v2.2d", + "fneg v3.2d, v2.2d", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "fabs": { @@ -2078,9 +2092,9 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "fabs d2, d2", + "fabs d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "ftst": { @@ -2115,19 +2129,19 @@ "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", "mov x21, v2.d[0]", - "lsr x21, x21, #63", - "strb w21, [x28, #745]", + "lsr x22, x21, #63", + "strb w22, [x28, #745]", "ldrb w21, [x28, #1026]", - "lsr w20, w21, w20", - "mov w21, #0x1", - "and w20, w20, #0x1", + "lsr w22, w21, w20", + "mov w20, #0x1", + "and w21, w22, #0x1", "mov w22, #0x0", "mrs x23, nzcv", - "cmp x20, #0x1 (1)", - "csel x21, x22, x21, eq", - "strb w21, [x28, #744]", - "strb w20, [x28, #746]", - "strb w21, [x28, #750]", + "cmp x21, #0x1 (1)", + "csel x24, x22, x20, eq", + "strb w24, [x28, #744]", + "strb w21, [x28, #746]", + "strb w24, [x28, #750]", "msr nzcv, x23" ] }, @@ -2139,11 +2153,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "mov x21, #0x3ff0000000000000", @@ -2160,11 +2174,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "mov x21, #0xa372", @@ -2184,11 +2198,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "mov x21, #0x82fe", @@ -2208,11 +2222,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "mov x21, #0x2d18", @@ -2232,11 +2246,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "mov x21, #0x79ff", @@ -2256,11 +2270,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "mov x21, #0x39ef", @@ -2280,11 +2294,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "mov w21, #0x0", @@ -2352,9 +2366,9 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", + "mov v3.8b, v0.8b", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "fyl2x": { @@ -2366,15 +2380,15 @@ "ldrb w20, [x28, #747]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "strb w21, [x28, #747]", + "and w22, w21, #0x7", + "strb w22, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr d3, [x0, #768]", "mov v0.8b, v2.8b", "mov v1.8b, v3.8b", @@ -2427,9 +2441,9 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "mov v4.8b, v0.8b", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fptan": { @@ -2441,12 +2455,12 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "sub w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w22", - "orr w21, w23, w21", + "and w23, w22, #0x7", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w23", + "orr w21, w22, w24", "strb w21, [x28, #1026]", - "strb w22, [x28, #747]", + "strb w23, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", @@ -2499,15 +2513,15 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", + "mov v3.8b, v0.8b", "mov x21, #0x3ff0000000000000", - "fmov d3, x21", + "fmov d2, x21", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]", - "add x0, x28, x22, lsl #4", - "str d3, [x0, #768]" + "str d3, [x0, #768]", + "add x0, x28, x23, lsl #4", + "str d2, [x0, #768]" ] }, "fpatan": { @@ -2519,15 +2533,15 @@ "ldrb w20, [x28, #747]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "strb w21, [x28, #747]", + "and w22, w21, #0x7", + "strb w22, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr d3, [x0, #768]", "mov v0.8b, v3.8b", "mov v1.8b, v2.8b", @@ -2580,9 +2594,9 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "mov v4.8b, v0.8b", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fxtract": { @@ -2594,25 +2608,25 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "sub w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w22", - "orr w21, w23, w21", + "and w23, w22, #0x7", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w23", + "orr w21, w22, w24", "strb w21, [x28, #1026]", - "strb w22, [x28, #747]", + "strb w23, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", "mov x21, v2.d[0]", - "and x23, x21, #0x7ff0000000000000", - "lsr x23, x23, #52", - "sub x23, x23, #0x3ff (1023)", - "scvtf d2, x23", - "and x21, x21, #0x800fffffffffffff", - "orr x21, x21, #0x3ff0000000000000", + "and x22, x21, #0x7ff0000000000000", + "lsr x24, x22, #52", + "sub x22, x24, #0x3ff (1023)", + "scvtf d2, x22", + "and x22, x21, #0x800fffffffffffff", + "orr x21, x22, #0x3ff0000000000000", "fmov d3, x21", "add x0, x28, x20, lsl #4", "str d2, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "str d3, [x0, #768]" ] }, @@ -2624,10 +2638,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr d3, [x0, #768]", "mov v0.8b, v2.8b", "mov v1.8b, v3.8b", @@ -2680,11 +2694,11 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", + "mov v4.8b, v0.8b", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdecstp": { @@ -2694,8 +2708,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -2706,8 +2720,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -2719,10 +2733,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr d3, [x0, #768]", "mov v0.8b, v2.8b", "mov v1.8b, v3.8b", @@ -2775,11 +2789,11 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", + "mov v4.8b, v0.8b", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fyl2xp1": { @@ -2791,20 +2805,20 @@ "ldrb w20, [x28, #747]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "strb w21, [x28, #747]", + "and w22, w21, #0x7", + "strb w22, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr d3, [x0, #768]", "mov x20, #0x3ff0000000000000", "fmov d4, x20", - "fadd d2, d2, d4", - "mov v0.8b, v2.8b", + "fadd d5, d2, d4", + "mov v0.8b, v5.8b", "mov v1.8b, v3.8b", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -2856,7 +2870,7 @@ "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", "mov v2.8b, v0.8b", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str d2, [x0, #768]" ] }, @@ -2869,9 +2883,9 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "fsqrt d2, d2", + "fsqrt d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "fsincos": { @@ -2883,12 +2897,12 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "sub w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w22", - "orr w21, w23, w21", + "and w23, w22, #0x7", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w23", + "orr w21, w22, w24", "strb w21, [x28, #1026]", - "strb w22, [x28, #747]", + "strb w23, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", @@ -2992,13 +3006,13 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", + "mov v4.8b, v0.8b", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", "str d3, [x0, #768]", - "add x0, x28, x22, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str d4, [x0, #768]" ] }, "frndint": { @@ -3010,9 +3024,9 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "frinti d2, d2", + "frinti d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "fscale": { @@ -3023,10 +3037,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr d3, [x0, #768]", "mov v0.8b, v2.8b", "mov v1.8b, v3.8b", @@ -3079,9 +3093,9 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", + "mov v4.8b, v0.8b", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsin": { @@ -3143,11 +3157,11 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", + "mov v3.8b, v0.8b", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "fcos": { @@ -3209,54 +3223,57 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", + "mov v3.8b, v0.8b", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "fiadd dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xda !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fimul dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xda !11b /1" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "ficom dword [rax]": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 16, "Comment": [ "0xda !11b /2" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", @@ -3272,14 +3289,15 @@ ] }, "ficomp dword [rax]": { - "ExpectedInstructionCount": 23, + "ExpectedInstructionCount": 24, "Comment": [ "0xda !11b /3" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", @@ -3294,80 +3312,84 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fisub dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xda !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fisubr dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xda !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fidiv dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xda !11b /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fidivr dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xda !11b /7" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fcmovb st0, st0": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc0 /0" ], @@ -3376,18 +3398,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc1 /0" ], @@ -3396,18 +3419,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st2": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc2 /0" ], @@ -3416,18 +3440,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st3": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc3 /0" ], @@ -3436,18 +3461,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st4": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc4 /0" ], @@ -3456,18 +3482,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st5": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc5 /0" ], @@ -3476,18 +3503,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st6": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc6 /0" ], @@ -3496,18 +3524,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st7": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc7 /0" ], @@ -3516,18 +3545,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st0": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc8 /1" ], @@ -3536,18 +3566,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc9 /1" ], @@ -3556,18 +3587,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st2": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xca /1" ], @@ -3576,18 +3608,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st3": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xcb /1" ], @@ -3596,18 +3629,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st4": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xcc /1" ], @@ -3616,18 +3650,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st5": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xcd /1" ], @@ -3636,18 +3671,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st6": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xce /1" ], @@ -3656,18 +3692,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st7": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xcf /1" ], @@ -3676,398 +3713,423 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st0": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd0 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st1": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd1 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st2": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd2 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st3": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd3 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st4": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd4 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st5": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd5 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st6": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd6 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st7": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd7 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovu st0, st0": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xd8 /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x0 (0)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st1": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xd9 /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x1 (1)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st2": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xda /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x2 (2)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x2 (2)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st3": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xdb /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x3 (3)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x3 (3)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st4": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xdc /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x4 (4)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x4 (4)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st5": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xdd /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x5 (5)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x5 (5)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st6": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xde /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x6 (6)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x6 (6)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st7": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xdf /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x7 (7)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x7 (7)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fucompp": { @@ -4079,8 +4141,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -4096,41 +4158,42 @@ "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", "lsl w23, w21, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "bic w24, w22, w23", + "strb w24, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "lsl w22, w21, w20", + "bic w21, w24, w22", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fild dword [rax]": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdf !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "str d2, [x0, #768]" ] }, "fisttp dword [rax]": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb !11b /1" ], @@ -4139,19 +4202,20 @@ "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", "fcvtzs w21, d2", - "str w21, [x4]", + "mov x22, x4", + "str w21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fist dword [rax]": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": [ "0xdb !11b /2" ], @@ -4161,11 +4225,12 @@ "ldr d2, [x0, #768]", "frinti d0, d2", "fcvtzs w20, d0", - "str w20, [x4]" + "mov x21, x4", + "str w20, [x21]" ] }, "fistp dword [rax]": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xdf !11b /7" ], @@ -4175,25 +4240,27 @@ "ldr d2, [x0, #768]", "frinti d0, d2", "fcvtzs w21, d0", - "str w21, [x4]", + "mov x22, x4", + "str w21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fld tword [rax]": { - "ExpectedInstructionCount": 38, + "ExpectedInstructionCount": 39, "Comment": [ "0xdb !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr q2, [x4]", + "mov x21, x4", + "ldr q2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -4219,21 +4286,21 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v2.8b, v0.8b", + "mov v3.8b, v0.8b", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "fstp tword [rax]": { - "ExpectedInstructionCount": 41, + "ExpectedInstructionCount": 42, "Comment": [ "0xdb !11b /7" ], @@ -4265,24 +4332,25 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "str d2, [x4]", - "mov x21, v2.d[1]", - "strh w21, [x4, #8]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "mov x21, x4", + "str d3, [x21]", + "mov x22, v3.d[1]", + "strh w22, [x21, #8]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcmovnb st0, st0": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc0 /0" ], @@ -4291,18 +4359,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc1 /0" ], @@ -4311,18 +4380,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st2": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc2 /0" ], @@ -4331,18 +4401,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st3": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc3 /0" ], @@ -4351,18 +4422,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st4": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc4 /0" ], @@ -4371,18 +4443,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st5": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc5 /0" ], @@ -4391,18 +4464,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st6": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc6 /0" ], @@ -4411,18 +4485,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st7": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc7 /0" ], @@ -4431,18 +4506,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st0": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc8 /1" ], @@ -4451,18 +4527,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc9 /1" ], @@ -4471,18 +4548,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st2": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xca /1" ], @@ -4491,18 +4569,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st3": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xcb /1" ], @@ -4511,18 +4590,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st4": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xcc /1" ], @@ -4531,18 +4611,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st5": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xcd /1" ], @@ -4551,18 +4632,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st6": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xce /1" ], @@ -4571,18 +4653,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st7": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xcf /1" ], @@ -4591,390 +4674,415 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st0": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd0 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st1": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd1 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st2": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd2 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st3": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd3 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st4": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd4 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st5": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd5 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st6": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd6 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st7": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd7 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnu st0, st0": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xd8 /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x0 (0)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st1": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xd9 /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x1 (1)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st2": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xda /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x2 (2)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x2 (2)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st3": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xdb /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x3 (3)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x3 (3)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st4": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xdc /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x4 (4)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x4 (4)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st5": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xdd /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x5 (5)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x5 (5)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st6": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xde /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x6 (6)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x6 (6)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st7": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xdf /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x7 (7)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x7 (7)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fnclex": { @@ -5009,347 +5117,366 @@ ] }, "fucomi st0, st0": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 12, "Comment": [ "0xdb 11b 0xe8 /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w20, vc", + "mov x26, x20", "axflag", "cfinv" ] }, "fucomi st0, st1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 12, "Comment": [ "0xdb 11b 0xe9 /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w20, vc", + "mov x26, x20", "axflag", "cfinv" ] }, "fucomi st0, st2": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 12, "Comment": [ "0xdb 11b 0xea /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w20, vc", + "mov x26, x20", "axflag", "cfinv" ] }, "fucomi st0, st3": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 12, "Comment": [ "0xdb 11b 0xeb /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w20, vc", + "mov x26, x20", "axflag", "cfinv" ] }, "fucomi st0, st4": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 12, "Comment": [ "0xdb 11b 0xec /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w20, vc", + "mov x26, x20", "axflag", "cfinv" ] }, "fucomi st0, st5": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 12, "Comment": [ "0xdb 11b 0xed /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w20, vc", + "mov x26, x20", "axflag", "cfinv" ] }, "fucomi st0, st6": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 12, "Comment": [ "0xdb 11b 0xee /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w20, vc", + "mov x26, x20", "axflag", "cfinv" ] }, "fucomi st0, st7": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 12, "Comment": [ "0xdb 11b 0xef /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w20, vc", + "mov x26, x20", "axflag", "cfinv" ] }, "fcomi st0, st0": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 12, "Comment": [ "0xdb 11b 0xf0 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w20, vc", + "mov x26, x20", "axflag", "cfinv" ] }, "fcomi st0, st1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 12, "Comment": [ "0xdb 11b 0xf1 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w20, vc", + "mov x26, x20", "axflag", "cfinv" ] }, "fcomi st0, st2": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 12, "Comment": [ "0xdb 11b 0xf2 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w20, vc", + "mov x26, x20", "axflag", "cfinv" ] }, "fcomi st0, st3": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 12, "Comment": [ "0xdb 11b 0xf3 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w20, vc", + "mov x26, x20", "axflag", "cfinv" ] }, "fcomi st0, st4": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 12, "Comment": [ "0xdb 11b 0xf4 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w20, vc", + "mov x26, x20", "axflag", "cfinv" ] }, "fcomi st0, st5": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 12, "Comment": [ "0xdb 11b 0xf5 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w20, vc", + "mov x26, x20", "axflag", "cfinv" ] }, "fcomi st0, st6": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 12, "Comment": [ "0xdb 11b 0xf6 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w20, vc", + "mov x26, x20", "axflag", "cfinv" ] }, "fcomi st0, st7": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 12, "Comment": [ "0xdb 11b 0xf7 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w20, vc", + "mov x26, x20", "axflag", "cfinv" ] }, "fadd qword [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": [ "0xdc !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul qword [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": [ "0xdc !11b /1" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fcom qword [rax]": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xdc !11b /2" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", @@ -5365,13 +5492,14 @@ ] }, "fcomp qword [rax]": { - "ExpectedInstructionCount": 22, + "ExpectedInstructionCount": 23, "Comment": [ "0xdc !11b /3" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", @@ -5386,72 +5514,76 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fsub qword [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": [ "0xdc !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr qword [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": [ "0xdc !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv qword [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": [ "0xdc !11b /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr qword [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": [ "0xdc !11b /7" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "db 0xdc, 0xc0": { @@ -5464,14 +5596,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fadd st1, st0": { @@ -5482,14 +5614,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fadd st2, st0": { @@ -5500,14 +5632,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fadd st3, st0": { @@ -5518,14 +5650,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fadd st4, st0": { @@ -5536,14 +5668,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fadd st5, st0": { @@ -5554,14 +5686,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fadd st6, st0": { @@ -5572,14 +5704,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fadd st7, st0": { @@ -5590,14 +5722,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "db 0xdc, 0xc8": { @@ -5610,14 +5742,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmul st1, st0": { @@ -5628,14 +5760,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmul st2, st0": { @@ -5646,14 +5778,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmul st3, st0": { @@ -5664,14 +5796,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmul st4, st0": { @@ -5682,14 +5814,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmul st5, st0": { @@ -5700,14 +5832,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmul st6, st0": { @@ -5718,14 +5850,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmul st7, st0": { @@ -5736,14 +5868,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "db 0xdc, 0xe0": { @@ -5756,14 +5888,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubr st1, st0": { @@ -5774,14 +5906,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubr st2, st0": { @@ -5792,14 +5924,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubr st3, st0": { @@ -5810,14 +5942,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubr st4, st0": { @@ -5828,14 +5960,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubr st5, st0": { @@ -5846,14 +5978,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubr st6, st0": { @@ -5864,14 +5996,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubr st7, st0": { @@ -5882,14 +6014,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "db 0xdc, 0xe8": { @@ -5902,14 +6034,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsub st1, st0": { @@ -5920,14 +6052,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsub st2, st0": { @@ -5938,14 +6070,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsub st3, st0": { @@ -5956,14 +6088,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsub st4, st0": { @@ -5974,14 +6106,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsub st5, st0": { @@ -5992,14 +6124,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsub st6, st0": { @@ -6010,14 +6142,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsub st7, st0": { @@ -6028,14 +6160,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "db 0xdc, 0xf0": { @@ -6048,14 +6180,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivr st1, st0": { @@ -6066,14 +6198,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivr st2, st0": { @@ -6084,14 +6216,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivr st3, st0": { @@ -6102,14 +6234,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivr st4, st0": { @@ -6120,14 +6252,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivr st5, st0": { @@ -6138,14 +6270,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivr st6, st0": { @@ -6156,14 +6288,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivr st7, st0": { @@ -6174,14 +6306,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "db 0xdc, 0xf8": { @@ -6194,14 +6326,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdiv st1, st0": { @@ -6212,14 +6344,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdiv st2, st0": { @@ -6230,14 +6362,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdiv st3, st0": { @@ -6248,14 +6380,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdiv st4, st0": { @@ -6266,14 +6398,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdiv st5, st0": { @@ -6284,14 +6416,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdiv st6, st0": { @@ -6302,14 +6434,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdiv st7, st0": { @@ -6320,30 +6452,31 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fld qword [rax]": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdd !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -6351,7 +6484,7 @@ ] }, "fisttp qword [rax]": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdd !11b /1" ], @@ -6360,19 +6493,20 @@ "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", "fcvtzs x21, d2", - "str x21, [x4]", + "mov x22, x4", + "str x21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fst qword [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": [ "0xdd !11b /2" ], @@ -6380,11 +6514,12 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "str d2, [x4]" + "mov x20, x4", + "str d2, [x20]" ] }, "fstp qword [rax]": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdd !11b /3" ], @@ -6392,87 +6527,90 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "str d2, [x4]", + "mov x21, x4", + "str d2, [x21]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "frstor [rax]": { - "ExpectedInstructionCount": 325, + "ExpectedInstructionCount": 328, "Comment": [ "0xdd !11b /4" ], "ExpectedArm64ASM": [ - "ldrh w20, [x4]", - "lsr w21, w20, #10", - "and w21, w21, #0x3", - "rbit w1, w21", + "mov x20, x4", + "ldrh w21, [x20]", + "lsr w22, w21, #10", + "and w23, w22, #0x3", + "rbit w1, w23", "lsr w1, w1, #30", "mrs x0, fpcr", "bfi x0, x1, #22, #2", - "lsr x1, x21, #2", + "lsr x1, x23, #2", "bfi x0, x1, #24, #1", "msr fpcr, x0", - "strh w20, [x28, #1024]", - "strh w20, [x28, #1024]", - "ldr w20, [x4, #4]", - "ubfx w21, w20, #11, #3", - "strb w21, [x28, #747]", - "ubfx w22, w20, #8, #1", - "ubfx w23, w20, #9, #1", - "ubfx w24, w20, #10, #1", - "ubfx w20, w20, #14, #1", - "strb w22, [x28, #744]", - "strb w23, [x28, #745]", - "strb w24, [x28, #746]", - "strb w20, [x28, #750]", - "ldr w20, [x4, #8]", - "ubfx w22, w20, #0, #2", - "mrs x23, nzcv", - "cmp x22, #0x3 (3)", - "cset x22, ne", - "ubfx w24, w20, #2, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #1", - "ubfx w24, w20, #4, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #2", - "ubfx w24, w20, #6, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #3", - "ubfx w24, w20, #8, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #4", - "ubfx w24, w20, #10, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #5", - "ubfx w24, w20, #12, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #6", - "ubfx w20, w20, #14, #2", - "cmp x20, #0x3 (3)", - "cset x20, ne", - "orr w20, w22, w20, lsl #7", - "strb w20, [x28, #1026]", - "add x20, x4, #0x1c (28)", - "mov x22, #0xffffffffffffffff", - "mov w24, #0xffff", - "fmov d2, x22", - "mov v2.d[1], x24", - "ldur q3, [x4, #28]", - "and v3.16b, v3.16b, v2.16b", + "strh w21, [x28, #1024]", + "strh w21, [x28, #1024]", + "ldr w21, [x20, #4]", + "ubfx w22, w21, #11, #3", + "strb w22, [x28, #747]", + "ubfx w23, w21, #8, #1", + "ubfx w24, w21, #9, #1", + "ubfx w25, w21, #10, #1", + "ubfx w30, w21, #14, #1", + "strb w23, [x28, #744]", + "strb w24, [x28, #745]", + "strb w25, [x28, #746]", + "strb w30, [x28, #750]", + "ldr w21, [x20, #8]", + "ubfx w23, w21, #0, #2", + "mrs x24, nzcv", + "cmp x23, #0x3 (3)", + "cset x25, ne", + "ubfx w23, w21, #2, #2", + "cmp x23, #0x3 (3)", + "cset x30, ne", + "orr w23, w25, w30, lsl #1", + "ubfx w25, w21, #4, #2", + "cmp x25, #0x3 (3)", + "cset x30, ne", + "orr w25, w23, w30, lsl #2", + "ubfx w23, w21, #6, #2", + "cmp x23, #0x3 (3)", + "cset x30, ne", + "orr w23, w25, w30, lsl #3", + "ubfx w25, w21, #8, #2", + "cmp x25, #0x3 (3)", + "cset x30, ne", + "orr w25, w23, w30, lsl #4", + "ubfx w23, w21, #10, #2", + "cmp x23, #0x3 (3)", + "cset x30, ne", + "orr w23, w25, w30, lsl #5", + "ubfx w25, w21, #12, #2", + "cmp x25, #0x3 (3)", + "cset x30, ne", + "orr w25, w23, w30, lsl #6", + "ubfx w23, w21, #14, #2", + "cmp x23, #0x3 (3)", + "cset x21, ne", + "orr w23, w25, w21, lsl #7", + "strb w23, [x28, #1026]", + "add x21, x20, #0x1c (28)", + "mov x23, #0xffffffffffffffff", + "mov w25, #0xffff", + "fmov d2, x23", + "mov v3.16b, v2.16b", + "mov v3.d[1], x25", + "ldur q2, [x20, #28]", + "and v4.16b, v2.16b, v3.16b", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -6485,8 +6623,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", + "mov x1, v4.d[0]", + "umov w2, v4.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -6498,14 +6636,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v3.8b, v0.8b", - "add x0, x28, x21, lsl #4", - "str d3, [x0, #768]", - "add x22, x20, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x20, #10]", - "and v3.16b, v3.16b, v2.16b", + "mov v2.8b, v0.8b", + "add x0, x28, x22, lsl #4", + "str d2, [x0, #768]", + "add x20, x21, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x21, #10]", + "and v4.16b, v2.16b, v3.16b", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -6518,8 +6656,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", + "mov x1, v4.d[0]", + "umov w2, v4.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -6531,14 +6669,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v3.8b, v0.8b", - "add x0, x28, x21, lsl #4", - "str d3, [x0, #768]", - "add x20, x22, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x22, #10]", - "and v3.16b, v3.16b, v2.16b", + "mov v2.8b, v0.8b", + "add x0, x28, x22, lsl #4", + "str d2, [x0, #768]", + "add x21, x20, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x20, #10]", + "and v4.16b, v2.16b, v3.16b", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -6551,8 +6689,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", + "mov x1, v4.d[0]", + "umov w2, v4.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -6564,14 +6702,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v3.8b, v0.8b", - "add x0, x28, x21, lsl #4", - "str d3, [x0, #768]", - "add x22, x20, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x20, #10]", - "and v3.16b, v3.16b, v2.16b", + "mov v2.8b, v0.8b", + "add x0, x28, x22, lsl #4", + "str d2, [x0, #768]", + "add x20, x21, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x21, #10]", + "and v4.16b, v2.16b, v3.16b", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -6584,8 +6722,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", + "mov x1, v4.d[0]", + "umov w2, v4.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -6597,14 +6735,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v3.8b, v0.8b", - "add x0, x28, x21, lsl #4", - "str d3, [x0, #768]", - "add x20, x22, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x22, #10]", - "and v3.16b, v3.16b, v2.16b", + "mov v2.8b, v0.8b", + "add x0, x28, x22, lsl #4", + "str d2, [x0, #768]", + "add x21, x20, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x20, #10]", + "and v4.16b, v2.16b, v3.16b", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -6617,8 +6755,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", + "mov x1, v4.d[0]", + "umov w2, v4.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -6630,14 +6768,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v3.8b, v0.8b", - "add x0, x28, x21, lsl #4", - "str d3, [x0, #768]", - "add x22, x20, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x20, #10]", - "and v3.16b, v3.16b, v2.16b", + "mov v2.8b, v0.8b", + "add x0, x28, x22, lsl #4", + "str d2, [x0, #768]", + "add x20, x21, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x21, #10]", + "and v4.16b, v2.16b, v3.16b", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -6650,8 +6788,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", + "mov x1, v4.d[0]", + "umov w2, v4.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -6663,14 +6801,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v3.8b, v0.8b", - "add x0, x28, x21, lsl #4", - "str d3, [x0, #768]", - "add x20, x22, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x22, #10]", - "and v2.16b, v3.16b, v2.16b", + "mov v2.8b, v0.8b", + "add x0, x28, x22, lsl #4", + "str d2, [x0, #768]", + "add x21, x20, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x20, #10]", + "and v4.16b, v2.16b, v3.16b", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -6683,8 +6821,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", + "mov x1, v4.d[0]", + "umov w2, v4.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -6697,14 +6835,15 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "mov v2.8b, v0.8b", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str d2, [x0, #768]", - "add x22, x20, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur d2, [x20, #10]", - "ldr h3, [x22, #8]", - "mov v2.h[4], v3.h[0]", + "add x20, x21, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur d2, [x21, #10]", + "ldr h3, [x20, #8]", + "mov v4.16b, v2.16b", + "mov v4.h[4], v3.h[0]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -6717,8 +6856,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", + "mov x1, v4.d[0]", + "umov w2, v4.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -6731,81 +6870,85 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "mov v2.8b, v0.8b", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str d2, [x0, #768]", - "msr nzcv, x23" + "msr nzcv, x24" ] }, "fnsave [rax]": { - "ExpectedInstructionCount": 335, + "ExpectedInstructionCount": 340, "Comment": [ "0xdd !11b /6" ], "ExpectedArm64ASM": [ - "ldrb w20, [x28, #747]", - "ldrh w21, [x28, #1024]", - "str w21, [x4]", - "mov w21, #0x0", - "mov x22, x21", - "bfi x22, x20, #11, #3", - "ldrb w23, [x28, #744]", - "ldrb w24, [x28, #745]", - "ldrb w25, [x28, #746]", - "ldrb w30, [x28, #750]", - "orr x22, x22, x23, lsl #8", - "orr x22, x22, x24, lsl #9", - "orr x22, x22, x25, lsl #10", - "orr x22, x22, x30, lsl #14", - "str w22, [x4, #4]", - "ldrb w22, [x28, #1026]", - "and w23, w22, #0x1", + "sub sp, sp, #0x20 (32)", + "mov x20, x4", + "ldrb w21, [x28, #747]", + "ldrh w22, [x28, #1024]", + "str w22, [x20]", + "mov w22, #0x0", + "mov x23, x22", + "bfi x23, x21, #11, #3", + "ldrb w24, [x28, #744]", + "ldrb w25, [x28, #745]", + "ldrb w30, [x28, #746]", + "ldrb w18, [x28, #750]", + "strb w21, [sp]", + "orr x21, x23, x24, lsl #8", + "orr x23, x21, x25, lsl #9", + "orr x21, x23, x30, lsl #10", + "orr x23, x21, x18, lsl #14", + "str w23, [x20, #4]", + "ldrb w21, [x28, #1026]", + "and w23, w21, #0x1", "mov w24, #0x3", "mrs x25, nzcv", "cmp x23, #0x0 (0)", - "csel x23, x24, x21, eq", - "orr w23, w21, w23", - "lsr w30, w22, #1", - "and w30, w30, #0x1", + "csel x30, x24, x22, eq", + "orr w23, w22, w30", + "lsr w30, w21, #1", + "and w18, w30, #0x1", + "cmp x18, #0x0 (0)", + "csel x30, x24, x22, eq", + "orr w18, w23, w30, lsl #2", + "lsr w23, w21, #2", + "and w30, w23, #0x1", "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #2", - "lsr w30, w22, #2", - "and w30, w30, #0x1", - "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #4", - "lsr w30, w22, #3", - "and w30, w30, #0x1", - "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #6", - "lsr w30, w22, #4", - "and w30, w30, #0x1", - "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #8", - "lsr w30, w22, #5", - "and w30, w30, #0x1", + "csel x23, x24, x22, eq", + "orr w30, w18, w23, lsl #4", + "lsr w23, w21, #3", + "and w18, w23, #0x1", + "cmp x18, #0x0 (0)", + "csel x23, x24, x22, eq", + "orr w18, w30, w23, lsl #6", + "lsr w23, w21, #4", + "and w30, w23, #0x1", "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #10", - "lsr w30, w22, #6", - "and w30, w30, #0x1", + "csel x23, x24, x22, eq", + "orr w30, w18, w23, lsl #8", + "lsr w23, w21, #5", + "and w18, w23, #0x1", + "cmp x18, #0x0 (0)", + "csel x23, x24, x22, eq", + "orr w18, w30, w23, lsl #10", + "lsr w23, w21, #6", + "and w30, w23, #0x1", "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #12", - "lsr w22, w22, #7", - "and w22, w22, #0x1", - "cmp x22, #0x0 (0)", - "csel x22, x24, x21, eq", - "orr w22, w23, w22, lsl #14", - "str w22, [x4, #8]", - "str w21, [x4, #12]", - "str w21, [x4, #16]", - "str w21, [x4, #20]", - "str w21, [x4, #24]", - "add x22, x4, #0x1c (28)", - "add x0, x28, x20, lsl #4", + "csel x23, x24, x22, eq", + "orr w30, w18, w23, lsl #12", + "lsr w23, w21, #7", + "and w21, w23, #0x1", + "cmp x21, #0x0 (0)", + "csel x23, x24, x22, eq", + "orr w21, w30, w23, lsl #14", + "str w21, [x20, #8]", + "str w22, [x20, #12]", + "str w22, [x20, #16]", + "str w22, [x20, #20]", + "str w22, [x20, #24]", + "add x21, x20, #0x1c (28)", + "ldrb w23, [sp]", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -6831,14 +6974,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "stur q2, [x4, #28]", - "add x23, x22, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "stur q3, [x20, #28]", + "add x20, x21, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -6864,14 +7007,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "stur q2, [x22, #10]", - "add x22, x23, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "stur q3, [x21, #10]", + "add x21, x20, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -6897,14 +7040,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "stur q2, [x23, #10]", - "add x23, x22, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "stur q3, [x20, #10]", + "add x20, x21, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -6930,14 +7073,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "stur q2, [x22, #10]", - "add x22, x23, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "stur q3, [x21, #10]", + "add x21, x20, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -6963,14 +7106,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "stur q2, [x23, #10]", - "add x23, x22, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "stur q3, [x20, #10]", + "add x20, x21, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -6996,14 +7139,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "stur q2, [x22, #10]", - "add x22, x23, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "stur q3, [x21, #10]", + "add x21, x20, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -7029,14 +7172,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "stur q2, [x23, #10]", - "add x23, x22, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "stur q3, [x20, #10]", + "add x20, x21, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -7062,41 +7205,44 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "stur d2, [x22, #10]", - "dup v2.8h, v2.h[4]", - "str h2, [x23, #8]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "stur d3, [x21, #10]", + "dup v2.8h, v3.h[4]", + "str h2, [x20, #8]", "mov w20, #0x37f", "strh w20, [x28, #1024]", - "strb w21, [x28, #747]", - "strb w21, [x28, #744]", - "strb w21, [x28, #745]", - "strb w21, [x28, #746]", - "strb w21, [x28, #750]", - "strb w21, [x28, #1026]", - "msr nzcv, x25" + "strb w22, [x28, #747]", + "strb w22, [x28, #744]", + "strb w22, [x28, #745]", + "strb w22, [x28, #746]", + "strb w22, [x28, #750]", + "strb w22, [x28, #1026]", + "msr nzcv, x25", + "add sp, sp, #0x20 (32)" ] }, "fnstsw [rax]": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 14, "Comment": [ "0xdd !11b /7" ], "ExpectedArm64ASM": [ "mov w20, #0x0", "ldrb w21, [x28, #747]", - "bfi x20, x21, #11, #3", - "ldrb w21, [x28, #744]", - "ldrb w22, [x28, #745]", + "mov x22, x20", + "bfi x22, x21, #11, #3", + "ldrb w20, [x28, #744]", + "ldrb w21, [x28, #745]", "ldrb w23, [x28, #746]", "ldrb w24, [x28, #750]", - "orr x20, x20, x21, lsl #8", - "orr x20, x20, x22, lsl #9", - "orr x20, x20, x23, lsl #10", - "orr x20, x20, x24, lsl #14", - "strh w20, [x4]" + "orr x25, x22, x20, lsl #8", + "orr x20, x25, x21, lsl #9", + "orr x21, x20, x23, lsl #10", + "orr x20, x21, x24, lsl #14", + "mov x21, x4", + "strh w20, [x21]" ] }, "ffree st0": { @@ -7106,12 +7252,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x0 (0)", - "and w20, w20, #0x7", + "add w21, w20, #0x0 (0)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -7123,11 +7269,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w20, w21, w20", - "bic w20, w22, w20", + "lsl w23, w21, w20", + "bic w20, w22, w23", "strb w20, [x28, #1026]" ] }, @@ -7138,12 +7284,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x2 (2)", - "and w20, w20, #0x7", + "add w21, w20, #0x2 (2)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -7154,12 +7300,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x3 (3)", - "and w20, w20, #0x7", + "add w21, w20, #0x3 (3)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -7170,12 +7316,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x4 (4)", - "and w20, w20, #0x7", + "add w21, w20, #0x4 (4)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -7186,12 +7332,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x5 (5)", - "and w20, w20, #0x7", + "add w21, w20, #0x5 (5)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -7202,12 +7348,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x6 (6)", - "and w20, w20, #0x7", + "add w21, w20, #0x6 (6)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -7218,12 +7364,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x7 (7)", - "and w20, w20, #0x7", + "add w21, w20, #0x7 (7)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -7235,10 +7381,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -7250,10 +7396,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -7265,10 +7411,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -7280,10 +7426,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -7295,10 +7441,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -7310,10 +7456,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -7325,10 +7471,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -7340,10 +7486,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -7355,18 +7501,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7379,17 +7525,17 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", + "and w23, w22, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "str q2, [x0, #768]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7401,18 +7547,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7424,18 +7570,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7447,19 +7593,19 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "strb w20, [x28, #747]" + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "strb w20, [x28, #747]" ] }, "fstp st5": { @@ -7470,18 +7616,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7493,18 +7639,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7516,18 +7662,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7540,8 +7686,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x0", "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7564,8 +7710,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7589,8 +7735,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7614,8 +7760,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7639,8 +7785,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7664,8 +7810,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7689,8 +7835,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7714,8 +7860,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7740,8 +7886,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x0", "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7756,11 +7902,11 @@ "strb w21, [x28, #745]", "strb w23, [x28, #746]", "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7773,8 +7919,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7789,11 +7935,11 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7805,8 +7951,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7822,11 +7968,11 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7838,8 +7984,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7855,11 +8001,11 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7871,8 +8017,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7888,11 +8034,11 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7904,8 +8050,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7921,11 +8067,11 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7937,8 +8083,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7954,11 +8100,11 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7970,8 +8116,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7987,57 +8133,60 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fiadd word [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": [ "0xde !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, w21", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fimul word [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": [ "0xde !11b /1" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, w21", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "ficom word [rax]": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 17, "Comment": [ "0xde !11b /2" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, w21", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -8054,14 +8203,15 @@ ] }, "ficomp word [rax]": { - "ExpectedInstructionCount": 24, + "ExpectedInstructionCount": 25, "Comment": [ "0xde !11b /3" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, w21", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -8077,80 +8227,84 @@ "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fisub word [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": [ "0xde !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, w21", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fisubr word [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": [ "0xde !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, w21", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fidiv word [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": [ "0xde !11b /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, w21", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fidivr word [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": [ "0xde !11b /7" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, w21", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "faddp st0": { @@ -8161,22 +8315,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fadd d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "faddp st1": { @@ -8188,21 +8342,21 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "fadd d4, d3, d2", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str d4, [x0, #768]" ] }, "faddp st2": { @@ -8213,22 +8367,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fadd d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "faddp st3": { @@ -8239,22 +8393,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fadd d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "faddp st4": { @@ -8265,22 +8419,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fadd d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "faddp st5": { @@ -8291,22 +8445,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fadd d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "faddp st6": { @@ -8317,22 +8471,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fadd d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "faddp st7": { @@ -8343,22 +8497,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fadd d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmulp st0": { @@ -8369,22 +8523,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fmul d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmulp st1": { @@ -8396,21 +8550,21 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "fmul d4, d3, d2", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str d4, [x0, #768]" ] }, "fmulp st2": { @@ -8421,22 +8575,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fmul d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmulp st3": { @@ -8447,22 +8601,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fmul d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmulp st4": { @@ -8473,22 +8627,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fmul d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmulp st5": { @@ -8499,22 +8653,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fmul d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmulp st6": { @@ -8525,22 +8679,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fmul d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmulp st7": { @@ -8551,22 +8705,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fmul d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fcompp": { @@ -8578,8 +8732,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -8595,15 +8749,15 @@ "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", "lsl w23, w21, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "bic w24, w22, w23", + "strb w24, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "lsl w22, w21, w20", + "bic w21, w24, w22", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -8617,22 +8771,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fsub d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubrp st1, st0": { @@ -8644,21 +8798,21 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "fsub d4, d3, d2", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str d4, [x0, #768]" ] }, "fsubrp st2, st0": { @@ -8669,22 +8823,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fsub d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubrp st3, st0": { @@ -8695,22 +8849,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fsub d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubrp st4, st0": { @@ -8721,22 +8875,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fsub d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubrp st5, st0": { @@ -8747,22 +8901,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fsub d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubrp st6, st0": { @@ -8773,22 +8927,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fsub d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubrp st7, st0": { @@ -8799,22 +8953,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fsub d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "db 0xde, 0xe8": { @@ -8827,22 +8981,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fsub d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubp st1, st0": { @@ -8854,21 +9008,21 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "fsub d4, d2, d3", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str d4, [x0, #768]" ] }, "fsubp st2, st0": { @@ -8879,22 +9033,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fsub d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubp st3, st0": { @@ -8905,22 +9059,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fsub d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubp st4, st0": { @@ -8931,22 +9085,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fsub d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubp st5, st0": { @@ -8957,22 +9111,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fsub d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubp st6, st0": { @@ -8983,22 +9137,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fsub d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubp st7, st0": { @@ -9009,22 +9163,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fsub d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "db 0xde, 0xf0": { @@ -9037,22 +9191,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fdiv d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivrp st1, st0": { @@ -9064,21 +9218,21 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "fdiv d4, d3, d2", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str d4, [x0, #768]" ] }, "fdivrp st2, st0": { @@ -9089,22 +9243,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fdiv d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivrp st3, st0": { @@ -9115,22 +9269,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fdiv d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivrp st4, st0": { @@ -9141,22 +9295,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fdiv d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivrp st5, st0": { @@ -9167,22 +9321,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fdiv d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivrp st6, st0": { @@ -9193,22 +9347,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fdiv d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivrp st7, st0": { @@ -9219,22 +9373,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fdiv d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "db 0xde, 0xf8": { @@ -9247,22 +9401,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fdiv d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivp st1, st0": { @@ -9274,21 +9428,21 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "fdiv d4, d2, d3", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str d4, [x0, #768]" ] }, "fdivp st2, st0": { @@ -9299,22 +9453,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fdiv d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivp st3, st0": { @@ -9325,22 +9479,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fdiv d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivp st4, st0": { @@ -9351,22 +9505,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fdiv d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivp st5, st0": { @@ -9377,22 +9531,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fdiv d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivp st6, st0": { @@ -9403,22 +9557,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fdiv d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivp st7, st0": { @@ -9429,48 +9583,49 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fdiv d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fild word [rax]": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xdf !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, x21", "add x0, x28, x20, lsl #4", "str d2, [x0, #768]" ] }, "fisttp word [rax]": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdf !11b /1" ], @@ -9479,19 +9634,20 @@ "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", "fcvtzs x21, d2", - "strh w21, [x4]", + "mov x22, x4", + "strh w21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fist word [rax]": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": [ "0xdf !11b /2" ], @@ -9501,11 +9657,12 @@ "ldr d2, [x0, #768]", "frinti d0, d2", "fcvtzs x20, d0", - "strh w20, [x4]" + "mov x21, x4", + "strh w20, [x21]" ] }, "fistp word [rax]": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xdf !11b /3" ], @@ -9515,33 +9672,35 @@ "ldr d2, [x0, #768]", "frinti d0, d2", "fcvtzs x21, d0", - "strh w21, [x4]", + "mov x22, x4", + "strh w21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fbld tword [rax]": { - "ExpectedInstructionCount": 66, + "ExpectedInstructionCount": 67, "Comment": [ "0xdf !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "ldr q2, [x4]", + "mov x21, x4", + "ldr q2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -9567,9 +9726,9 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -9582,8 +9741,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", + "mov x1, v3.d[0]", + "umov w2, v3.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -9601,7 +9760,7 @@ ] }, "fbstp tword [rax]": { - "ExpectedInstructionCount": 69, + "ExpectedInstructionCount": 70, "Comment": [ "0xdf !11b /6" ], @@ -9633,9 +9792,9 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -9648,8 +9807,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", + "mov x1, v3.d[0]", + "umov w2, v3.h[4]", "ldr x3, [x28, #1392]", "blr x3", "ldr w4, [x28, #728]", @@ -9664,16 +9823,17 @@ "eor v2.16b, v2.16b, v2.16b", "mov v2.d[0], x0", "mov v2.h[4], w1", - "str d2, [x4]", - "mov x21, v2.d[1]", - "strh w21, [x4, #8]", + "mov x21, x4", + "str d2, [x21]", + "mov x22, v2.d[1]", + "strh w22, [x21, #8]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -9684,8 +9844,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -9696,8 +9856,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -9708,8 +9868,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -9720,8 +9880,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -9732,8 +9892,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -9744,8 +9904,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -9756,8 +9916,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -9768,60 +9928,65 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fnstsw ax": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 16, "Comment": [ "0xdf 11b 0xe0 /4" ], "ExpectedArm64ASM": [ "mov w20, #0x0", "ldrb w21, [x28, #747]", - "bfi x20, x21, #11, #3", - "ldrb w21, [x28, #744]", - "ldrb w22, [x28, #745]", + "mov x22, x20", + "bfi x22, x21, #11, #3", + "ldrb w20, [x28, #744]", + "ldrb w21, [x28, #745]", "ldrb w23, [x28, #746]", "ldrb w24, [x28, #750]", - "orr x20, x20, x21, lsl #8", - "orr x20, x20, x22, lsl #9", - "orr x20, x20, x23, lsl #10", - "orr x20, x20, x24, lsl #14", - "bfxil x4, x20, #0, #16" + "orr x25, x22, x20, lsl #8", + "orr x20, x25, x21, lsl #9", + "orr x21, x20, x23, lsl #10", + "orr x20, x21, x24, lsl #14", + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "fucomip st0": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 20, "Comment": [ "0xdf 11b 0xe8 /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", "mov w21, #0x1", - "cset w26, vc", + "cset w22, vc", + "mov x26, x22", "axflag", "cfinv", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fucomip st1": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 20, "Comment": [ "0xdf 11b 0xe9 /5" ], @@ -9829,215 +9994,223 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w22, vc", + "mov x26, x22", "axflag", "cfinv", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fucomip st2": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 20, "Comment": [ "0xdf 11b 0xea /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", "mov w21, #0x1", - "cset w26, vc", + "cset w22, vc", + "mov x26, x22", "axflag", "cfinv", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fucomip st3": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 20, "Comment": [ "0xdf 11b 0xeb /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", "mov w21, #0x1", - "cset w26, vc", + "cset w22, vc", + "mov x26, x22", "axflag", "cfinv", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fucomip st4": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 20, "Comment": [ "0xdf 11b 0xec /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", "mov w21, #0x1", - "cset w26, vc", + "cset w22, vc", + "mov x26, x22", "axflag", "cfinv", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fucomip st5": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 20, "Comment": [ "0xdf 11b 0xed /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", "mov w21, #0x1", - "cset w26, vc", + "cset w22, vc", + "mov x26, x22", "axflag", "cfinv", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fucomip st6": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 20, "Comment": [ "0xdf 11b 0xee /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", "mov w21, #0x1", - "cset w26, vc", + "cset w22, vc", + "mov x26, x22", "axflag", "cfinv", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fucomip st7": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 20, "Comment": [ "0xdf 11b 0xef /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", "mov w21, #0x1", - "cset w26, vc", + "cset w22, vc", + "mov x26, x22", "axflag", "cfinv", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcomip st0": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 20, "Comment": [ "0xdf 11b 0xf0 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", "mov w21, #0x1", - "cset w26, vc", + "cset w22, vc", + "mov x26, x22", "axflag", "cfinv", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcomip st1": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 20, "Comment": [ "0xdf 11b 0xf1 /6" ], @@ -10045,183 +10218,190 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", - "cset w26, vc", + "cset w22, vc", + "mov x26, x22", "axflag", "cfinv", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcomip st2": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 20, "Comment": [ "0xdf 11b 0xf2 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", "mov w21, #0x1", - "cset w26, vc", + "cset w22, vc", + "mov x26, x22", "axflag", "cfinv", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcomip st3": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 20, "Comment": [ "0xdf 11b 0xf3 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", "mov w21, #0x1", - "cset w26, vc", + "cset w22, vc", + "mov x26, x22", "axflag", "cfinv", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcomip st4": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 20, "Comment": [ "0xdf 11b 0xf4 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", "mov w21, #0x1", - "cset w26, vc", + "cset w22, vc", + "mov x26, x22", "axflag", "cfinv", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcomip st5": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 20, "Comment": [ "0xdf 11b 0xf5 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", "mov w21, #0x1", - "cset w26, vc", + "cset w22, vc", + "mov x26, x22", "axflag", "cfinv", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcomip st6": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 20, "Comment": [ "0xdf 11b 0xf6 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", "mov w21, #0x1", - "cset w26, vc", + "cset w22, vc", + "mov x26, x22", "axflag", "cfinv", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcomip st7": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 20, "Comment": [ "0xdf 11b 0xf7 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", "mov w21, #0x1", - "cset w26, vc", + "cset w22, vc", + "mov x26, x22", "axflag", "cfinv", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] } diff --git a/unittests/InstructionCountCI/H0F38.json b/unittests/InstructionCountCI/H0F38.json index 2fb8062559..3219dca384 100644 --- a/unittests/InstructionCountCI/H0F38.json +++ b/unittests/InstructionCountCI/H0F38.json @@ -20,20 +20,23 @@ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", "movi v4.16b, #0x87", - "and v3.16b, v3.16b, v4.16b", - "tbl v2.8b, {v2.16b}, v3.8b", - "str d2, [x28, #768]" + "and v5.16b, v3.16b, v4.16b", + "tbl v3.8b, {v2.16b}, v5.8b", + "str d3, [x28, #768]" ] }, "pshufb xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x66 0x0f 0x38 0x00" ], "ExpectedArm64ASM": [ - "movi v2.16b, #0x8f", - "and v2.16b, v17.16b, v2.16b", - "tbl v16.16b, {v16.16b}, v2.16b" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "movi v4.16b, #0x8f", + "and v5.16b, v3.16b, v4.16b", + "tbl v3.16b, {v2.16b}, v5.16b", + "mov v16.16b, v3.16b" ] }, "phaddw mm0, mm1": { @@ -44,17 +47,20 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "addp v2.4h, v3.4h, v2.4h", - "str d2, [x28, #768]" + "addp v4.4h, v3.4h, v2.4h", + "str d4, [x28, #768]" ] }, "phaddw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x38 0x01" ], "ExpectedArm64ASM": [ - "addp v16.8h, v16.8h, v17.8h" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "addp v4.8h, v3.8h, v2.8h", + "mov v16.16b, v4.16b" ] }, "phaddd mm0, mm1": { @@ -65,17 +71,20 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "addp v2.2s, v3.2s, v2.2s", - "str d2, [x28, #768]" + "addp v4.2s, v3.2s, v2.2s", + "str d4, [x28, #768]" ] }, "phaddd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x38 0x02" ], "ExpectedArm64ASM": [ - "addp v16.4s, v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "addp v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "phaddsw mm0, mm1": { @@ -87,20 +96,23 @@ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", "uzp1 v4.4h, v2.4h, v3.4h", - "uzp2 v2.4h, v2.4h, v3.4h", - "sqadd v2.8h, v4.8h, v2.8h", + "uzp2 v5.4h, v2.4h, v3.4h", + "sqadd v2.8h, v4.8h, v5.8h", "str d2, [x28, #768]" ] }, "phaddsw xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x66 0x0f 0x38 0x03" ], "ExpectedArm64ASM": [ - "uzp1 v2.8h, v16.8h, v17.8h", - "uzp2 v3.8h, v16.8h, v17.8h", - "sqadd v16.8h, v2.8h, v3.8h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "uzp1 v4.8h, v2.8h, v3.8h", + "uzp2 v5.8h, v2.8h, v3.8h", + "sqadd v2.8h, v4.8h, v5.8h", + "mov v16.16b, v2.16b" ] }, "pmaddubsw mm0, mm1": { @@ -111,17 +123,17 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "uxtl v2.8h, v2.8b", - "sxtl v3.8h, v3.8b", - "smull v4.4s, v2.4h, v3.4h", - "smull2 v2.4s, v2.8h, v3.8h", - "addp v2.4s, v4.4s, v2.4s", - "sqxtn v2.4h, v2.4s", - "str d2, [x28, #768]" + "uxtl v4.8h, v2.8b", + "sxtl v2.8h, v3.8b", + "smull v3.4s, v4.4h, v2.4h", + "smull2 v5.4s, v4.8h, v2.8h", + "addp v2.4s, v3.4s, v5.4s", + "sqxtn v3.4h, v2.4s", + "str d3, [x28, #768]" ] }, "pmaddubsw xmm0, xmm1": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": [ "{u,s}xtl{,2} and uzp{1,2} can be more optimal", "Up-front zero extend and sign extend the elements in place", @@ -130,15 +142,18 @@ "0x66 0x0f 0x38 0x04" ], "ExpectedArm64ASM": [ - "uxtl v2.8h, v16.8b", - "sxtl v3.8h, v17.8b", - "mul v2.8h, v2.8h, v3.8h", - "uxtl2 v3.8h, v16.16b", - "sxtl2 v4.8h, v17.16b", - "mul v3.8h, v3.8h, v4.8h", - "uzp1 v4.8h, v2.8h, v3.8h", - "uzp2 v2.8h, v2.8h, v3.8h", - "sqadd v16.8h, v4.8h, v2.8h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "uxtl v4.8h, v2.8b", + "sxtl v5.8h, v3.8b", + "mul v6.8h, v4.8h, v5.8h", + "uxtl2 v4.8h, v2.16b", + "sxtl2 v2.8h, v3.16b", + "mul v3.8h, v4.8h, v2.8h", + "uzp1 v2.8h, v6.8h, v3.8h", + "uzp2 v4.8h, v6.8h, v3.8h", + "sqadd v3.8h, v2.8h, v4.8h", + "mov v16.16b, v3.16b" ] }, "phsubw mm0, mm1": { @@ -150,20 +165,23 @@ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", "uzp1 v4.4h, v2.4h, v3.4h", - "uzp2 v2.4h, v2.4h, v3.4h", - "sub v2.8h, v4.8h, v2.8h", + "uzp2 v5.4h, v2.4h, v3.4h", + "sub v2.8h, v4.8h, v5.8h", "str d2, [x28, #768]" ] }, "phsubw xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x66 0x0f 0x38 0x05" ], "ExpectedArm64ASM": [ - "uzp1 v2.8h, v16.8h, v17.8h", - "uzp2 v3.8h, v16.8h, v17.8h", - "sub v16.8h, v2.8h, v3.8h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "uzp1 v4.8h, v2.8h, v3.8h", + "uzp2 v5.8h, v2.8h, v3.8h", + "sub v2.8h, v4.8h, v5.8h", + "mov v16.16b, v2.16b" ] }, "phsubd mm0, mm1": { @@ -175,20 +193,23 @@ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", "uzp1 v4.2s, v2.2s, v3.2s", - "uzp2 v2.2s, v2.2s, v3.2s", - "sub v2.4s, v4.4s, v2.4s", + "uzp2 v5.2s, v2.2s, v3.2s", + "sub v2.4s, v4.4s, v5.4s", "str d2, [x28, #768]" ] }, "phsubd xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x66 0x0f 0x38 0x06" ], "ExpectedArm64ASM": [ - "uzp1 v2.4s, v16.4s, v17.4s", - "uzp2 v3.4s, v16.4s, v17.4s", - "sub v16.4s, v2.4s, v3.4s" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "uzp1 v4.4s, v2.4s, v3.4s", + "uzp2 v5.4s, v2.4s, v3.4s", + "sub v2.4s, v4.4s, v5.4s", + "mov v16.16b, v2.16b" ] }, "phsubsw mm0, mm1": { @@ -200,20 +221,23 @@ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", "uzp1 v4.4h, v2.4h, v3.4h", - "uzp2 v2.4h, v2.4h, v3.4h", - "sqsub v2.8h, v4.8h, v2.8h", + "uzp2 v5.4h, v2.4h, v3.4h", + "sqsub v2.8h, v4.8h, v5.8h", "str d2, [x28, #768]" ] }, "phsubsw xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x66 0x0f 0x38 0x07" ], "ExpectedArm64ASM": [ - "uzp1 v2.8h, v16.8h, v17.8h", - "uzp2 v3.8h, v16.8h, v17.8h", - "sqsub v16.8h, v2.8h, v3.8h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "uzp1 v4.8h, v2.8h, v3.8h", + "uzp2 v5.8h, v2.8h, v3.8h", + "sqsub v2.8h, v4.8h, v5.8h", + "mov v16.16b, v2.16b" ] }, "psignb mm0, mm1": { @@ -224,21 +248,24 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "sqshl v2.8b, v2.8b, #7", - "srshr v2.8b, v2.8b, #7", - "mul v2.16b, v3.16b, v2.16b", - "str d2, [x28, #768]" + "sqshl v4.8b, v2.8b, #7", + "srshr v2.8b, v4.8b, #7", + "mul v4.16b, v3.16b, v2.16b", + "str d4, [x28, #768]" ] }, "psignb xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x66 0x0f 0x38 0x08" ], "ExpectedArm64ASM": [ - "sqshl v2.16b, v17.16b, #7", - "srshr v2.16b, v2.16b, #7", - "mul v16.16b, v16.16b, v2.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "sqshl v4.16b, v2.16b, #7", + "srshr v2.16b, v4.16b, #7", + "mul v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "psignw mm0, mm1": { @@ -249,21 +276,24 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "sqshl v2.4h, v2.4h, #15", - "srshr v2.4h, v2.4h, #15", - "mul v2.8h, v3.8h, v2.8h", - "str d2, [x28, #768]" + "sqshl v4.4h, v2.4h, #15", + "srshr v2.4h, v4.4h, #15", + "mul v4.8h, v3.8h, v2.8h", + "str d4, [x28, #768]" ] }, "psignw xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x66 0x0f 0x38 0x09" ], "ExpectedArm64ASM": [ - "sqshl v2.8h, v17.8h, #15", - "srshr v2.8h, v2.8h, #15", - "mul v16.8h, v16.8h, v2.8h" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "sqshl v4.8h, v2.8h, #15", + "srshr v2.8h, v4.8h, #15", + "mul v4.8h, v3.8h, v2.8h", + "mov v16.16b, v4.16b" ] }, "psignd mm0, mm1": { @@ -274,21 +304,24 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "sqshl v2.2s, v2.2s, #31", - "srshr v2.2s, v2.2s, #31", - "mul v2.4s, v3.4s, v2.4s", - "str d2, [x28, #768]" + "sqshl v4.2s, v2.2s, #31", + "srshr v2.2s, v4.2s, #31", + "mul v4.4s, v3.4s, v2.4s", + "str d4, [x28, #768]" ] }, "psignd xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x66 0x0f 0x38 0x0a" ], "ExpectedArm64ASM": [ - "sqshl v2.4s, v17.4s, #31", - "srshr v2.4s, v2.4s, #31", - "mul v16.4s, v16.4s, v2.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "sqshl v4.4s, v2.4s, #31", + "srshr v2.4s, v4.4s, #31", + "mul v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "pmulhrsw mm0, mm1": { @@ -300,114 +333,148 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "smull v2.4s, v2.4h, v3.4h", - "sshr v2.4s, v2.4s, #14", + "smull v4.4s, v2.4h, v3.4h", + "sshr v2.4s, v4.4s, #14", "movi v3.4s, #0x1, lsl #0", - "add v2.4s, v2.4s, v3.4s", - "shrn v2.4h, v2.4s, #1", + "add v4.4s, v2.4s, v3.4s", + "shrn v2.4h, v4.4s, #1", "str d2, [x28, #768]" ] }, "pmulhrsw xmm0, xmm1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 14, "Comment": [ "Might be able to use sqdmulh", "0x66 0x0f 0x38 0x0b" ], "ExpectedArm64ASM": [ - "smull v2.4s, v16.4h, v17.4h", - "smull2 v3.4s, v16.8h, v17.8h", - "sshr v2.4s, v2.4s, #14", - "sshr v3.4s, v3.4s, #14", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "smull v4.4s, v2.4h, v3.4h", + "smull2 v5.4s, v2.8h, v3.8h", + "sshr v2.4s, v4.4s, #14", + "sshr v3.4s, v5.4s, #14", "movi v4.4s, #0x1, lsl #0", - "add v2.4s, v2.4s, v4.4s", - "add v3.4s, v3.4s, v4.4s", - "shrn v2.4h, v2.4s, #1", - "mov v0.16b, v2.16b", - "shrn2 v0.8h, v3.4s, #1", - "mov v16.16b, v0.16b" + "add v5.4s, v2.4s, v4.4s", + "add v2.4s, v3.4s, v4.4s", + "shrn v3.4h, v5.4s, #1", + "mov v0.16b, v3.16b", + "shrn2 v0.8h, v2.4s, #1", + "mov v4.16b, v0.16b", + "mov v16.16b, v4.16b" ] }, "pblendvb xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0x66 0x0f 0x38 0x10" ], "ExpectedArm64ASM": [ - "sshr v2.16b, v16.16b, #7", - "bit v16.16b, v17.16b, v2.16b" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "sshr v4.16b, v2.16b, #7", + "mov v5.16b, v4.16b", + "bsl v5.16b, v3.16b, v2.16b", + "mov v16.16b, v5.16b" ] }, "blendvps xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0x66 0x0f 0x38 0x14" ], "ExpectedArm64ASM": [ - "sshr v2.4s, v16.4s, #31", - "bit v16.16b, v17.16b, v2.16b" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "sshr v4.4s, v2.4s, #31", + "mov v5.16b, v4.16b", + "bsl v5.16b, v3.16b, v2.16b", + "mov v16.16b, v5.16b" ] }, "blendvpd xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0x66 0x0f 0x38 0x15" ], "ExpectedArm64ASM": [ - "sshr v2.2d, v16.2d, #63", - "bit v16.16b, v17.16b, v2.16b" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "sshr v4.2d, v2.2d, #63", + "mov v5.16b, v4.16b", + "bsl v5.16b, v3.16b, v2.16b", + "mov v16.16b, v5.16b" ] }, "pblendvb xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x38 0x10" ], "ExpectedArm64ASM": [ - "sshr v2.16b, v16.16b, #7", - "bit v17.16b, v18.16b, v2.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v18.16b", + "mov v4.16b, v16.16b", + "sshr v5.16b, v4.16b, #7", + "mov v4.16b, v5.16b", + "bsl v4.16b, v3.16b, v2.16b", + "mov v17.16b, v4.16b" ] }, "blendvps xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x38 0x14" ], "ExpectedArm64ASM": [ - "sshr v2.4s, v16.4s, #31", - "bit v17.16b, v18.16b, v2.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v18.16b", + "mov v4.16b, v16.16b", + "sshr v5.4s, v4.4s, #31", + "mov v4.16b, v5.16b", + "bsl v4.16b, v3.16b, v2.16b", + "mov v17.16b, v4.16b" ] }, "blendvpd xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x38 0x15" ], "ExpectedArm64ASM": [ - "sshr v2.2d, v16.2d, #63", - "bit v17.16b, v18.16b, v2.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v18.16b", + "mov v4.16b, v16.16b", + "sshr v5.2d, v4.2d, #63", + "mov v4.16b, v5.16b", + "bsl v4.16b, v3.16b, v2.16b", + "mov v17.16b, v4.16b" ] }, "ptest xmm0, xmm1": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 18, "Comment": [ "0x66 0x0f 0x38 0x17" ], "ExpectedArm64ASM": [ - "and v2.16b, v16.16b, v17.16b", - "bic v3.16b, v17.16b, v16.16b", - "umaxv h2, v2.8h", - "umaxv h3, v3.8h", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "and v4.16b, v2.16b, v3.16b", + "bic v5.16b, v3.16b, v2.16b", + "umaxv h2, v4.8h", + "umaxv h3, v5.8h", "umov w20, v2.h[0]", "umov w21, v3.h[0]", - "mov w27, #0x0", - "mov w26, #0x1", + "mov w22, #0x0", + "mov w23, #0x1", "cmp x21, #0x0 (0)", - "cset x21, eq", + "cset x24, eq", "tst w20, w20", "mrs x20, nzcv", - "orr w20, w20, w21, lsl #29", - "msr nzcv, x20" + "orr w21, w20, w24, lsl #29", + "mov x26, x23", + "mov x27, x22", + "msr nzcv, x21" ] }, "pabsb mm0, mm1": { @@ -417,17 +484,19 @@ ], "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", - "abs v2.16b, v2.16b", - "str d2, [x28, #768]" + "abs v3.16b, v2.16b", + "str d3, [x28, #768]" ] }, "pabsb xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x38 0x1c" ], "ExpectedArm64ASM": [ - "abs v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "abs v3.16b, v2.16b", + "mov v16.16b, v3.16b" ] }, "pabsw mm0, mm1": { @@ -437,17 +506,19 @@ ], "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", - "abs v2.8h, v2.8h", - "str d2, [x28, #768]" + "abs v3.8h, v2.8h", + "str d3, [x28, #768]" ] }, "pabsw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x38 0x1d" ], "ExpectedArm64ASM": [ - "abs v16.8h, v17.8h" + "mov v2.16b, v17.16b", + "abs v3.8h, v2.8h", + "mov v16.16b, v3.16b" ] }, "pabsd mm0, mm1": { @@ -457,498 +528,583 @@ ], "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", - "abs v2.4s, v2.4s", - "str d2, [x28, #768]" + "abs v3.4s, v2.4s", + "str d3, [x28, #768]" ] }, "pabsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x38 0x1e" ], "ExpectedArm64ASM": [ - "abs v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "abs v3.4s, v2.4s", + "mov v16.16b, v3.16b" ] }, "pmovzxbw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x38 0x30" ], "ExpectedArm64ASM": [ - "uxtl v16.8h, v17.8b" + "mov v2.16b, v17.16b", + "uxtl v3.8h, v2.8b", + "mov v16.16b, v3.16b" ] }, "pmovzxbd xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x38 0x31" ], "ExpectedArm64ASM": [ - "uxtl v2.8h, v17.8b", - "uxtl v16.4s, v2.4h" + "mov v2.16b, v17.16b", + "uxtl v3.8h, v2.8b", + "uxtl v2.4s, v3.4h", + "mov v16.16b, v2.16b" ] }, "pmovzxbq xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x38 0x32" ], "ExpectedArm64ASM": [ - "uxtl v2.8h, v17.8b", - "uxtl v2.4s, v2.4h", - "uxtl v16.2d, v2.2s" + "mov v2.16b, v17.16b", + "uxtl v3.8h, v2.8b", + "uxtl v2.4s, v3.4h", + "uxtl v3.2d, v2.2s", + "mov v16.16b, v3.16b" ] }, "pmovzxwd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x38 0x33" ], "ExpectedArm64ASM": [ - "uxtl v16.4s, v17.4h" + "mov v2.16b, v17.16b", + "uxtl v3.4s, v2.4h", + "mov v16.16b, v3.16b" ] }, "pmovzxwq xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x38 0x34" ], "ExpectedArm64ASM": [ - "uxtl v2.4s, v17.4h", - "uxtl v16.2d, v2.2s" + "mov v2.16b, v17.16b", + "uxtl v3.4s, v2.4h", + "uxtl v2.2d, v3.2s", + "mov v16.16b, v2.16b" ] }, "pmovzxdq xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x38 0x35" ], "ExpectedArm64ASM": [ - "uxtl v16.2d, v17.2s" + "mov v2.16b, v17.16b", + "uxtl v3.2d, v2.2s", + "mov v16.16b, v3.16b" ] }, "pcmpgtq xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x38 0x37" ], "ExpectedArm64ASM": [ - "cmgt v16.2d, v16.2d, v17.2d" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "cmgt v4.2d, v3.2d, v2.2d", + "mov v16.16b, v4.16b" ] }, "pminsb xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x38 0x38" ], "ExpectedArm64ASM": [ - "smin v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "smin v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "pminsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x38 0x39" ], "ExpectedArm64ASM": [ - "smin v16.4s, v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "smin v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "pminuw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x38 0x3a" ], "ExpectedArm64ASM": [ - "umin v16.8h, v16.8h, v17.8h" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "umin v4.8h, v3.8h, v2.8h", + "mov v16.16b, v4.16b" ] }, "pminud xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x38 0x3b" ], "ExpectedArm64ASM": [ - "umin v16.4s, v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "umin v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "pmaxsb xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x38 0x3c" ], "ExpectedArm64ASM": [ - "smax v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "smax v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "pmaxsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x38 0x3d" ], "ExpectedArm64ASM": [ - "smax v16.4s, v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "smax v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "pmaxuw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x38 0x3e" ], "ExpectedArm64ASM": [ - "umax v16.8h, v16.8h, v17.8h" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "umax v4.8h, v3.8h, v2.8h", + "mov v16.16b, v4.16b" ] }, "pmaxud xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x38 0x3f" ], "ExpectedArm64ASM": [ - "umax v16.4s, v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "umax v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "pmulld xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x38 0x40" ], "ExpectedArm64ASM": [ - "mul v16.4s, v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "mul v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "phminposuw xmm0, xmm1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": [ "0x66 0x0f 0x38 0x41" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2048]", - "zip1 v3.8h, v2.8h, v17.8h", - "zip2 v2.8h, v2.8h, v17.8h", - "umin v2.4s, v3.4s, v2.4s", - "uminv s2, v2.4s", - "rev32 v16.8h, v2.8h" + "mov v2.16b, v17.16b", + "ldr q3, [x28, #2048]", + "zip1 v4.8h, v3.8h, v2.8h", + "zip2 v5.8h, v3.8h, v2.8h", + "umin v2.4s, v4.4s, v5.4s", + "uminv s3, v2.4s", + "rev32 v2.8h, v3.8h", + "mov v16.16b, v2.16b" ] }, "sha1nexte xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0x66 0x0f 0x38 0xc8" ], "ExpectedArm64ASM": [ - "shl v2.4s, v16.4s, #30", - "usra v2.4s, v16.4s, #2", - "add v2.4s, v17.4s, v2.4s", - "mov v16.16b, v17.16b", - "mov v16.s[3], v2.s[3]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "shl v4.4s, v2.4s, #30", + "mov v5.16b, v4.16b", + "usra v5.4s, v2.4s, #2", + "add v2.4s, v3.4s, v5.4s", + "mov v4.16b, v3.16b", + "mov v4.s[3], v2.s[3]", + "mov v16.16b, v4.16b" ] }, "sha1msg1 xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x38 0xc9" ], "ExpectedArm64ASM": [ - "ext v2.16b, v17.16b, v16.16b, #8", - "eor v16.16b, v16.16b, v2.16b" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "ext v4.16b, v3.16b, v2.16b, #8", + "eor v3.16b, v2.16b, v4.16b", + "mov v16.16b, v3.16b" ] }, "sha1msg2 xmm0, xmm1": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 16, "Comment": [ "0x66 0x0f 0x38 0xca" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "ext v2.16b, v2.16b, v17.16b, #12", - "eor v2.16b, v16.16b, v2.16b", - "shl v3.4s, v2.4s, #1", - "mov v0.16b, v3.16b", - "usra v0.4s, v2.4s, #31", - "mov v2.16b, v0.16b", - "dup v3.4s, v2.s[3]", - "eor v3.16b, v16.16b, v3.16b", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "movi v4.2d, #0x0", + "ext v5.16b, v4.16b, v3.16b, #12", + "eor v3.16b, v2.16b, v5.16b", "shl v4.4s, v3.4s, #1", - "mov v0.16b, v4.16b", - "usra v0.4s, v3.4s, #31", - "mov v3.16b, v0.16b", - "mov v16.16b, v2.16b", - "mov v16.s[0], v3.s[0]" + "mov v5.16b, v4.16b", + "usra v5.4s, v3.4s, #31", + "dup v3.4s, v5.s[3]", + "eor v4.16b, v2.16b, v3.16b", + "shl v2.4s, v4.4s, #1", + "mov v3.16b, v2.16b", + "usra v3.4s, v4.4s, #31", + "mov v2.16b, v5.16b", + "mov v2.s[0], v3.s[0]", + "mov v16.16b, v2.16b" ] }, "sha256rnds2 xmm0, xmm1": { - "ExpectedInstructionCount": 56, + "ExpectedInstructionCount": 61, "Comment": [ "0x66 0x0f 0x38 0xcb" ], "ExpectedArm64ASM": [ - "mov w20, v17.s[1]", - "mov w21, v17.s[0]", - "mov w22, v16.s[1]", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov w20, v3.s[1]", + "mov w21, v3.s[0]", + "mov w22, v2.s[1]", "and w23, w20, w21", - "bic w22, w22, w20", - "eor w22, w23, w22", + "bic w24, w22, w20", + "eor w22, w23, w24", "ror w23, w20, #6", - "eor w23, w23, w20, ror #11", - "eor w23, w23, w20, ror #25", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v16.s[0]", - "add w22, w22, w23", - "mov w23, v17.s[3]", - "mov w24, v17.s[2]", - "mov w25, v16.s[3]", - "and w30, w24, w25", - "orr w25, w24, w25", - "and w25, w23, w25", - "orr w25, w25, w30", - "add w25, w22, w25", - "ror w30, w23, #2", - "eor w30, w30, w23, ror #13", - "eor w30, w30, w23, ror #22", - "add w25, w25, w30", - "mov w30, v16.s[2]", - "add w22, w22, w30", - "and w20, w22, w20", - "bic w21, w21, w22", - "eor w20, w20, w21", - "ror w21, w22, #6", - "eor w21, w21, w22, ror #11", - "eor w21, w21, w22, ror #25", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "mov w21, v16.s[1]", - "add w20, w20, w21", - "and w21, w23, w24", - "orr w23, w23, w24", - "and w23, w25, w23", - "orr w21, w23, w21", - "add w21, w20, w21", - "ror w23, w25, #2", - "eor w23, w23, w25, ror #13", - "eor w23, w23, w25, ror #22", - "add w21, w21, w23", - "mov w23, v16.s[3]", - "add w20, w20, w23", - "mov v2.16b, v16.16b", - "mov v2.s[3], w21", - "mov v2.s[2], w25", - "mov v2.s[1], w20", - "mov v16.16b, v2.16b", - "mov v16.s[0], w22" + "eor w24, w23, w20, ror #11", + "eor w23, w24, w20, ror #25", + "add w24, w22, w23", + "mov w22, v2.s[0]", + "add w23, w24, w22", + "mov w22, v2.s[0]", + "add w24, w23, w22", + "mov w22, v3.s[3]", + "mov w23, v3.s[2]", + "mov w25, v2.s[3]", + "and w30, w23, w25", + "orr w18, w23, w25", + "and w25, w22, w18", + "orr w18, w25, w30", + "add w25, w24, w18", + "ror w30, w22, #2", + "eor w18, w30, w22, ror #13", + "eor w30, w18, w22, ror #22", + "add w18, w25, w30", + "mov w25, v2.s[2]", + "add w30, w24, w25", + "and w24, w30, w20", + "bic w20, w21, w30", + "eor w21, w24, w20", + "ror w20, w30, #6", + "eor w24, w20, w30, ror #11", + "eor w20, w24, w30, ror #25", + "add w24, w21, w20", + "mov w20, v2.s[1]", + "add w21, w24, w20", + "mov w20, v2.s[1]", + "add w24, w21, w20", + "and w20, w22, w23", + "orr w21, w22, w23", + "and w22, w18, w21", + "orr w21, w22, w20", + "add w20, w24, w21", + "ror w21, w18, #2", + "eor w22, w21, w18, ror #13", + "eor w21, w22, w18, ror #22", + "add w22, w20, w21", + "mov w20, v2.s[3]", + "add w21, w24, w20", + "mov v3.16b, v2.16b", + "mov v3.s[3], w22", + "mov v2.16b, v3.16b", + "mov v2.s[2], w18", + "mov v3.16b, v2.16b", + "mov v3.s[1], w21", + "mov v2.16b, v3.16b", + "mov v2.s[0], w30", + "mov v16.16b, v2.16b" ] }, "sha256msg1 xmm0, xmm1": { - "ExpectedInstructionCount": 35, + "ExpectedInstructionCount": 40, "Comment": [ "0x66 0x0f 0x38 0xcc" ], "ExpectedArm64ASM": [ - "mov w20, v17.s[0]", - "mov w21, v16.s[3]", - "mov w22, v16.s[2]", - "mov w23, v16.s[1]", - "mov w24, v16.s[0]", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov w20, v3.s[0]", + "mov w21, v2.s[3]", + "mov w22, v2.s[2]", + "mov w23, v2.s[1]", + "mov w24, v2.s[0]", "ror w25, w20, #7", "ror w30, w20, #18", - "eor w25, w25, w30", - "lsr w20, w20, #3", - "eor w20, w25, w20", - "add w20, w21, w20", - "ror w25, w21, #7", + "eor w18, w25, w30", + "lsr w25, w20, #3", + "eor w20, w18, w25", + "add w25, w21, w20", + "ror w20, w21, #7", "ror w30, w21, #18", - "eor w25, w25, w30", - "lsr w21, w21, #3", - "eor w21, w25, w21", - "add w21, w22, w21", - "ror w25, w22, #7", + "eor w18, w20, w30", + "lsr w20, w21, #3", + "eor w21, w18, w20", + "add w20, w22, w21", + "ror w21, w22, #7", "ror w30, w22, #18", - "eor w25, w25, w30", - "lsr w22, w22, #3", - "eor w22, w25, w22", - "add w22, w23, w22", - "ror w25, w23, #7", + "eor w18, w21, w30", + "lsr w21, w22, #3", + "eor w22, w18, w21", + "add w21, w23, w22", + "ror w22, w23, #7", "ror w30, w23, #18", - "eor w25, w25, w30", - "lsr w23, w23, #3", - "eor w23, w25, w23", - "add w23, w24, w23", - "mov v2.16b, v16.16b", - "mov v2.s[3], w20", - "mov v2.s[2], w21", - "mov v2.s[1], w22", - "mov v16.16b, v2.16b", - "mov v16.s[0], w23" + "eor w18, w22, w30", + "lsr w22, w23, #3", + "eor w23, w18, w22", + "add w22, w24, w23", + "mov v3.16b, v2.16b", + "mov v3.s[3], w25", + "mov v2.16b, v3.16b", + "mov v2.s[2], w20", + "mov v3.16b, v2.16b", + "mov v3.s[1], w21", + "mov v2.16b, v3.16b", + "mov v2.s[0], w22", + "mov v16.16b, v2.16b" ] }, "sha256msg2 xmm0, xmm1": { - "ExpectedInstructionCount": 36, + "ExpectedInstructionCount": 41, "Comment": [ "0x66 0x0f 0x38 0xcd" ], "ExpectedArm64ASM": [ - "mov w20, v17.s[2]", - "mov w21, v17.s[3]", - "mov w22, v16.s[0]", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov w20, v3.s[2]", + "mov w21, v3.s[3]", + "mov w22, v2.s[0]", "ror w23, w20, #17", "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w20, w20, #10", - "eor w20, w23, w20", - "add w20, w22, w20", - "mov w22, v16.s[1]", - "ror w23, w21, #17", + "eor w25, w23, w24", + "lsr w23, w20, #10", + "eor w20, w25, w23", + "add w23, w22, w20", + "mov w20, v2.s[1]", + "ror w22, w21, #17", "ror w24, w21, #19", - "eor w23, w23, w24", - "lsr w21, w21, #10", - "eor w21, w23, w21", - "add w21, w22, w21", - "mov w22, v16.s[2]", - "ror w23, w20, #17", - "ror w24, w20, #19", - "eor w23, w23, w24", - "lsr w24, w20, #10", - "eor w23, w23, w24", - "add w22, w22, w23", - "mov w23, v16.s[3]", - "ror w24, w21, #17", - "ror w25, w21, #19", - "eor w24, w24, w25", - "lsr w25, w21, #10", - "eor w24, w24, w25", - "add w23, w23, w24", - "mov v2.16b, v16.16b", - "mov v2.s[3], w23", - "mov v2.s[2], w22", - "mov v2.s[1], w21", - "mov v16.16b, v2.16b", - "mov v16.s[0], w20" + "eor w25, w22, w24", + "lsr w22, w21, #10", + "eor w21, w25, w22", + "add w22, w20, w21", + "mov w20, v2.s[2]", + "ror w21, w23, #17", + "ror w24, w23, #19", + "eor w25, w21, w24", + "lsr w21, w23, #10", + "eor w24, w25, w21", + "add w21, w20, w24", + "mov w20, v2.s[3]", + "ror w24, w22, #17", + "ror w25, w22, #19", + "eor w30, w24, w25", + "lsr w24, w22, #10", + "eor w25, w30, w24", + "add w24, w20, w25", + "mov v3.16b, v2.16b", + "mov v3.s[3], w24", + "mov v2.16b, v3.16b", + "mov v2.s[2], w21", + "mov v3.16b, v2.16b", + "mov v3.s[1], w22", + "mov v2.16b, v3.16b", + "mov v2.s[0], w23", + "mov v16.16b, v2.16b" ] }, "movbe ax, word [rbx]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x38 0xf0" ], "ExpectedArm64ASM": [ - "ldrh w20, [x7]", - "rev w20, w20", - "bfxil x4, x20, #16, #16" + "mov x20, x7", + "ldrh w21, [x20]", + "rev w20, w21", + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #16, #16", + "mov x4, x22" ] }, "movbe eax, dword [rbx]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x38 0xf0" ], "ExpectedArm64ASM": [ - "ldr w20, [x7]", - "rev w4, w20" + "mov x20, x7", + "ldr w21, [x20]", + "rev w20, w21", + "mov x4, x20" ] }, "movbe rax, qword [rbx]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "REX.W 0x66 0x0f 0x38 0xf0" ], "ExpectedArm64ASM": [ - "ldr x20, [x7]", - "rev x4, x20" + "mov x20, x7", + "ldr x21, [x20]", + "rev x20, x21", + "mov x4, x20" ] }, "adcx eax, ebx": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 18, "Comment": [ "0x66 0x0f 0x38 0xf6" ], "ExpectedArm64ASM": [ "cset w20, hs", - "mov w21, w7", - "mov w22, w4", - "add w23, w21, w20", - "add w4, w22, w23", - "mrs x22, nzcv", - "cmp w4, w21", + "mov x21, x7", + "mov w22, w21", + "mov x21, x4", + "mov w23, w21", + "add w21, w22, w20", + "add w24, w23, w21", + "mov x4, x24", + "mrs x21, nzcv", + "cmp w24, w22", "cset x23, lo", - "cmp w4, w21", - "cset x21, ls", + "cmp w24, w22", + "cset x25, ls", "cmp x20, #0x1 (1)", - "csel x20, x21, x23, eq", - "mov w0, w22", - "bfi w0, w20, #29, #1", - "mov w20, w0", + "csel x22, x25, x23, eq", + "mov w20, w21", + "bfi w20, w22, #29, #1", "msr nzcv, x20" ] }, "adcx rax, rbx": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 16, "Comment": [ "0x66 REX.W 0x0f 0x38 0xf6" ], "ExpectedArm64ASM": [ "cset w20, hs", - "add x21, x7, x20", - "add x4, x4, x21", - "mrs x21, nzcv", - "cmp x4, x7", - "cset x22, lo", - "cmp x4, x7", - "cset x23, ls", + "mov x21, x7", + "mov x22, x4", + "add x23, x21, x20", + "add x24, x22, x23", + "mov x4, x24", + "mrs x22, nzcv", + "cmp x24, x21", + "cset x23, lo", + "cmp x24, x21", + "cset x25, ls", "cmp x20, #0x1 (1)", - "csel x20, x23, x22, eq", - "mov w0, w21", - "bfi w0, w20, #29, #1", - "mov w20, w0", + "csel x21, x25, x23, eq", + "mov w20, w22", + "bfi w20, w21, #29, #1", "msr nzcv, x20" ] }, "adox eax, ebx": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 18, "Comment": [ "0xf3 0x0f 0x38 0xf6" ], "ExpectedArm64ASM": [ "cset w20, vs", - "mov w21, w7", - "mov w22, w4", - "add w23, w21, w20", - "add w4, w22, w23", - "mrs x22, nzcv", - "cmp w4, w21", + "mov x21, x7", + "mov w22, w21", + "mov x21, x4", + "mov w23, w21", + "add w21, w22, w20", + "add w24, w23, w21", + "mov x4, x24", + "mrs x21, nzcv", + "cmp w24, w22", "cset x23, lo", - "cmp w4, w21", - "cset x21, ls", + "cmp w24, w22", + "cset x25, ls", "cmp x20, #0x1 (1)", - "csel x20, x21, x23, eq", - "mov w0, w22", - "bfi w0, w20, #28, #1", - "mov w20, w0", + "csel x22, x25, x23, eq", + "mov w20, w21", + "bfi w20, w22, #28, #1", "msr nzcv, x20" ] }, "adox rax, rbx": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 16, "Comment": [ "0xf3 REX.W 0x0f 0x38 0xf6" ], "ExpectedArm64ASM": [ "cset w20, vs", - "add x21, x7, x20", - "add x4, x4, x21", - "mrs x21, nzcv", - "cmp x4, x7", - "cset x22, lo", - "cmp x4, x7", - "cset x23, ls", + "mov x21, x7", + "mov x22, x4", + "add x23, x21, x20", + "add x24, x22, x23", + "mov x4, x24", + "mrs x22, nzcv", + "cmp x24, x21", + "cset x23, lo", + "cmp x24, x21", + "cset x25, ls", "cmp x20, #0x1 (1)", - "csel x20, x23, x22, eq", - "mov w0, w21", - "bfi w0, w20, #28, #1", - "mov w20, w0", + "csel x21, x25, x23, eq", + "mov w20, w22", + "bfi w20, w21, #28, #1", "msr nzcv, x20" ] } diff --git a/unittests/InstructionCountCI/H0F3A.json b/unittests/InstructionCountCI/H0F3A.json index 752bc3d179..82b7a8b193 100644 --- a/unittests/InstructionCountCI/H0F3A.json +++ b/unittests/InstructionCountCI/H0F3A.json @@ -32,8 +32,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "ext v2.8b, v2.8b, v3.8b, #1", - "str d2, [x28, #768]" + "ext v4.8b, v2.8b, v3.8b, #1", + "str d4, [x28, #768]" ] }, "palignr mm0, mm1, 255": { @@ -47,213 +47,273 @@ ] }, "roundps xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Nearest rounding", "0x66 0x0f 0x3a 0x08" ], "ExpectedArm64ASM": [ - "frintn v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "frintn v3.4s, v2.4s", + "mov v16.16b, v3.16b" ] }, "roundps xmm0, xmm1, 00000001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "-inf rounding", "0x66 0x0f 0x3a 0x08" ], "ExpectedArm64ASM": [ - "frintm v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "frintm v3.4s, v2.4s", + "mov v16.16b, v3.16b" ] }, "roundps xmm0, xmm1, 00000010b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "+inf rounding", "0x66 0x0f 0x3a 0x08" ], "ExpectedArm64ASM": [ - "frintp v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "frintp v3.4s, v2.4s", + "mov v16.16b, v3.16b" ] }, "roundps xmm0, xmm1, 00000011b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "truncate rounding", "0x66 0x0f 0x3a 0x08" ], "ExpectedArm64ASM": [ - "frintz v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "frintz v3.4s, v2.4s", + "mov v16.16b, v3.16b" ] }, "roundps xmm0, xmm1, 00000100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "host rounding mode rounding", "0x66 0x0f 0x3a 0x08" ], "ExpectedArm64ASM": [ - "frinti v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "frinti v3.4s, v2.4s", + "mov v16.16b, v3.16b" ] }, "roundpd xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Nearest rounding", "0x66 0x0f 0x3a 0x09" ], "ExpectedArm64ASM": [ - "frintn v16.2d, v17.2d" + "mov v2.16b, v17.16b", + "frintn v3.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "roundpd xmm0, xmm1, 00000001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "-inf rounding", "0x66 0x0f 0x3a 0x09" ], "ExpectedArm64ASM": [ - "frintm v16.2d, v17.2d" + "mov v2.16b, v17.16b", + "frintm v3.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "roundpd xmm0, xmm1, 00000010b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "+inf rounding", "0x66 0x0f 0x3a 0x09" ], "ExpectedArm64ASM": [ - "frintp v16.2d, v17.2d" + "mov v2.16b, v17.16b", + "frintp v3.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "roundpd xmm0, xmm1, 00000011b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "truncate rounding", "0x66 0x0f 0x3a 0x09" ], "ExpectedArm64ASM": [ - "frintz v16.2d, v17.2d" + "mov v2.16b, v17.16b", + "frintz v3.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "roundpd xmm0, xmm1, 00000100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "host rounding mode rounding", "0x66 0x0f 0x3a 0x09" ], "ExpectedArm64ASM": [ - "frinti v16.2d, v17.2d" + "mov v2.16b, v17.16b", + "frinti v3.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "roundss xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "Nearest rounding", "0x66 0x0f 0x3a 0x0a" ], "ExpectedArm64ASM": [ - "frintn s0, s17", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frintn s0, s3", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "roundss xmm0, xmm1, 00000001b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "-inf rounding", "0x66 0x0f 0x3a 0x0a" ], "ExpectedArm64ASM": [ - "frintm s0, s17", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frintm s0, s3", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "roundss xmm0, xmm1, 00000010b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "+inf rounding", "0x66 0x0f 0x3a 0x0a" ], "ExpectedArm64ASM": [ - "frintp s0, s17", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frintp s0, s3", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "roundss xmm0, xmm1, 00000011b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "truncate rounding", "0x66 0x0f 0x3a 0x0a" ], "ExpectedArm64ASM": [ - "frintz s0, s17", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frintz s0, s3", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "roundss xmm0, xmm1, 00000100b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "host rounding mode rounding", "0x66 0x0f 0x3a 0x0a" ], "ExpectedArm64ASM": [ - "frinti s0, s17", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frinti s0, s3", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "roundsd xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "Nearest rounding", "0x66 0x0f 0x3a 0x0b" ], "ExpectedArm64ASM": [ - "frintn d0, d17", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frintn d0, d3", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "roundsd xmm0, xmm1, 00000001b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "-inf rounding", "0x66 0x0f 0x3a 0x0b" ], "ExpectedArm64ASM": [ - "frintm d0, d17", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frintm d0, d3", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "roundsd xmm0, xmm1, 00000010b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "+inf rounding", "0x66 0x0f 0x3a 0x0b" ], "ExpectedArm64ASM": [ - "frintp d0, d17", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frintp d0, d3", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "roundsd xmm0, xmm1, 00000011b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "truncate rounding", "0x66 0x0f 0x3a 0x0b" ], "ExpectedArm64ASM": [ - "frintz d0, d17", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frintz d0, d3", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "roundsd xmm0, xmm1, 00000100b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "host rounding mode rounding", "0x66 0x0f 0x3a 0x0b" ], "ExpectedArm64ASM": [ - "frinti d0, d17", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frinti d0, d3", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "blendps xmm0, xmm1, 0000b": { @@ -264,146 +324,207 @@ "ExpectedArm64ASM": [] }, "blendps xmm0, xmm1, 0001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "mov v16.s[0], v17.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov v16.16b, v4.16b" ] }, "blendps xmm0, xmm1, 0010b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "mov v16.s[1], v17.s[1]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.s[1], v3.s[1]", + "mov v16.16b, v4.16b" ] }, "blendps xmm0, xmm1, 0011b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "mov v16.d[0], v17.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov v16.16b, v4.16b" ] }, "blendps xmm0, xmm1, 0100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "mov v16.s[2], v17.s[2]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.s[2], v3.s[2]", + "mov v16.16b, v4.16b" ] }, "blendps xmm0, xmm1, 0101b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "rev64 v2.4s, v17.4s", - "trn2 v16.4s, v2.4s, v16.4s" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "rev64 v4.4s, v3.4s", + "trn2 v3.4s, v4.4s, v2.4s", + "mov v16.16b, v3.16b" ] }, "blendps xmm0, xmm1, 0110b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2176]", - "tbx v16.16b, {v17.16b}, v2.16b" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "ldr q4, [x28, #2176]", + "mov v0.16b, v2.16b", + "tbx v0.16b, {v3.16b}, v4.16b", + "mov v5.16b, v0.16b", + "mov v16.16b, v5.16b" ] }, "blendps xmm0, xmm1, 0111b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2192]", - "tbx v16.16b, {v17.16b}, v2.16b" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "ldr q4, [x28, #2192]", + "mov v0.16b, v2.16b", + "tbx v0.16b, {v3.16b}, v4.16b", + "mov v5.16b, v0.16b", + "mov v16.16b, v5.16b" ] }, "blendps xmm0, xmm1, 1000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "mov v16.s[3], v17.s[3]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.s[3], v3.s[3]", + "mov v16.16b, v4.16b" ] }, "blendps xmm0, xmm1, 1001b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2208]", - "tbx v16.16b, {v17.16b}, v2.16b" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "ldr q4, [x28, #2208]", + "mov v0.16b, v2.16b", + "tbx v0.16b, {v3.16b}, v4.16b", + "mov v5.16b, v0.16b", + "mov v16.16b, v5.16b" ] }, "blendps xmm0, xmm1, 1010b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "rev64 v2.4s, v16.4s", - "trn2 v16.4s, v2.4s, v17.4s" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "rev64 v4.4s, v2.4s", + "trn2 v2.4s, v4.4s, v3.4s", + "mov v16.16b, v2.16b" ] }, "blendps xmm0, xmm1, 1011b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2224]", - "tbx v16.16b, {v17.16b}, v2.16b" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "ldr q4, [x28, #2224]", + "mov v0.16b, v2.16b", + "tbx v0.16b, {v3.16b}, v4.16b", + "mov v5.16b, v0.16b", + "mov v16.16b, v5.16b" ] }, "blendps xmm0, xmm1, 1100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "mov v16.d[1], v17.d[1]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.d[1], v3.d[1]", + "mov v16.16b, v4.16b" ] }, "blendps xmm0, xmm1, 1101b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2240]", - "tbx v16.16b, {v17.16b}, v2.16b" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "ldr q4, [x28, #2240]", + "mov v0.16b, v2.16b", + "tbx v0.16b, {v3.16b}, v4.16b", + "mov v5.16b, v0.16b", + "mov v16.16b, v5.16b" ] }, "blendps xmm0, xmm1, 1110b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2256]", - "tbx v16.16b, {v17.16b}, v2.16b" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "ldr q4, [x28, #2256]", + "mov v0.16b, v2.16b", + "tbx v0.16b, {v3.16b}, v4.16b", + "mov v5.16b, v0.16b", + "mov v16.16b, v5.16b" ] }, "blendps xmm0, xmm1, 1111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x0c" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v16.16b, v2.16b" ] }, "blendpd xmm0, xmm1, 00b": { @@ -414,30 +535,39 @@ "ExpectedArm64ASM": [] }, "blendpd xmm0, xmm1, 01b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x0d" ], "ExpectedArm64ASM": [ - "mov v16.d[0], v17.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov v16.16b, v4.16b" ] }, "blendpd xmm0, xmm1, 10b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x0d" ], "ExpectedArm64ASM": [ - "mov v16.d[1], v17.d[1]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.d[1], v3.d[1]", + "mov v16.16b, v4.16b" ] }, "blendpd xmm0, xmm1, 11b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x0d" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v16.16b, v2.16b" ] }, "pblendw xmm0, xmm1, 00000000b": { @@ -448,1149 +578,1405 @@ "ExpectedArm64ASM": [] }, "pblendw xmm0, xmm1, 00000001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x0e" ], "ExpectedArm64ASM": [ - "mov v16.h[0], v17.h[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.h[0], v3.h[0]", + "mov v16.16b, v4.16b" ] }, "pblendw xmm0, xmm1, 11010111b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": [ "0x66 0x0f 0x3a 0x0e" ], "ExpectedArm64ASM": [ + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", "ldr x0, [x28, #1792]", - "ldr q2, [x0, #3440]", - "tbx v16.16b, {v17.16b}, v2.16b" + "ldr q4, [x0, #3440]", + "mov v0.16b, v2.16b", + "tbx v0.16b, {v3.16b}, v4.16b", + "mov v5.16b, v0.16b", + "mov v16.16b, v5.16b" ] }, "pblendw xmm0, xmm1, 00000011b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x0e" ], "ExpectedArm64ASM": [ - "mov v16.s[0], v17.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov v16.16b, v4.16b" ] }, "pblendw xmm0, xmm1, 00001100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x0e" ], "ExpectedArm64ASM": [ - "mov v16.s[1], v17.s[1]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.s[1], v3.s[1]", + "mov v16.16b, v4.16b" ] }, "pblendw xmm0, xmm1, 00110000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x0e" ], "ExpectedArm64ASM": [ - "mov v16.s[2], v17.s[2]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.s[2], v3.s[2]", + "mov v16.16b, v4.16b" ] }, "pblendw xmm0, xmm1, 11000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x0e" ], "ExpectedArm64ASM": [ - "mov v16.s[3], v17.s[3]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.s[3], v3.s[3]", + "mov v16.16b, v4.16b" ] }, "pblendw xmm0, xmm1, 00001111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x0e" ], "ExpectedArm64ASM": [ - "mov v16.d[0], v17.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov v16.16b, v4.16b" ] }, "pblendw xmm0, xmm1, 11110000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x0e" ], "ExpectedArm64ASM": [ - "mov v16.d[1], v17.d[1]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.d[1], v3.d[1]", + "mov v16.16b, v4.16b" ] }, "pblendw xmm0, xmm1, 11111111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x0e" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v16.16b, v2.16b" ] }, "palignr xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x0f" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v16.16b, v2.16b" ] }, "palignr xmm0, xmm1, 1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "0x66 0x0f 0x3a 0x0f" ], "ExpectedArm64ASM": [ - "ext v16.16b, v17.16b, v16.16b, #1" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "ext v4.16b, v2.16b, v3.16b, #1", + "mov v16.16b, v4.16b" ] }, "palignr xmm0, xmm1, 255": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x0f" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b" ] }, "pextrb eax, xmm0, 0000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x3a 0x14" ], "ExpectedArm64ASM": [ - "umov w4, v16.b[0]" + "mov v2.16b, v16.16b", + "umov w20, v2.b[0]", + "mov x4, x20" ] }, "pextrb eax, xmm0, 1111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x3a 0x14" ], "ExpectedArm64ASM": [ - "umov w4, v16.b[15]" + "mov v2.16b, v16.16b", + "umov w20, v2.b[15]", + "mov x4, x20" ] }, "pextrw eax, xmm0, 000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x3a 0x15" ], "ExpectedArm64ASM": [ - "umov w4, v16.h[0]" + "mov v2.16b, v16.16b", + "umov w20, v2.h[0]", + "mov x4, x20" ] }, "pextrw eax, xmm0, 111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x3a 0x15" ], "ExpectedArm64ASM": [ - "umov w4, v16.h[7]" + "mov v2.16b, v16.16b", + "umov w20, v2.h[7]", + "mov x4, x20" ] }, "pextrd eax, xmm0, 00b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x3a 0x16" ], "ExpectedArm64ASM": [ - "mov w4, v16.s[0]" + "mov v2.16b, v16.16b", + "mov w20, v2.s[0]", + "mov x4, x20" ] }, "pextrd eax, xmm0, 11b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x3a 0x16" ], "ExpectedArm64ASM": [ - "mov w4, v16.s[3]" + "mov v2.16b, v16.16b", + "mov w20, v2.s[3]", + "mov x4, x20" ] }, "pextrq rax, xmm0, 0b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 REX.W 0x0f 0x3a 0x16" ], "ExpectedArm64ASM": [ - "mov x4, v16.d[0]" + "mov v2.16b, v16.16b", + "mov x20, v2.d[0]", + "mov x4, x20" ] }, "pextrq rax, xmm0, 1b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 REX.W 0x0f 0x3a 0x16" ], "ExpectedArm64ASM": [ - "mov x4, v16.d[1]" + "mov v2.16b, v16.16b", + "mov x20, v2.d[1]", + "mov x4, x20" ] }, "pextrb [rax], xmm0, 0000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x3a 0x14" ], "ExpectedArm64ASM": [ - "st1 {v16.b}[0], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.b}[0], [x20]" ] }, "pextrb [rax], xmm0, 1111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x3a 0x14" ], "ExpectedArm64ASM": [ - "st1 {v16.b}[15], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.b}[15], [x20]" ] }, "pextrw [rax], xmm0, 000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x3a 0x15" ], "ExpectedArm64ASM": [ - "st1 {v16.h}[0], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.h}[0], [x20]" ] }, "pextrw [rax], xmm0, 111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x3a 0x15" ], "ExpectedArm64ASM": [ - "st1 {v16.h}[7], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.h}[7], [x20]" ] }, "pextrd [rax], xmm0, 00b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x3a 0x16" ], "ExpectedArm64ASM": [ - "st1 {v16.s}[0], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.s}[0], [x20]" ] }, "pextrd [rax], xmm0, 11b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x3a 0x16" ], "ExpectedArm64ASM": [ - "st1 {v16.s}[3], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.s}[3], [x20]" ] }, "pextrq [rax], xmm0, 0b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 REX.W 0x0f 0x3a 0x16" ], "ExpectedArm64ASM": [ - "st1 {v16.d}[0], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.d}[0], [x20]" ] }, "pextrq [rax], xmm0, 1b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 REX.W 0x0f 0x3a 0x16" ], "ExpectedArm64ASM": [ - "st1 {v16.d}[1], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.d}[1], [x20]" ] }, "extractps eax, xmm0, 00b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x3a 0x17" ], "ExpectedArm64ASM": [ - "mov w4, v16.s[0]" + "mov v2.16b, v16.16b", + "mov w20, v2.s[0]", + "mov x4, x20" ] }, "extractps eax, xmm0, 11b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x66 0x0f 0x3a 0x17" ], "ExpectedArm64ASM": [ - "mov w4, v16.s[3]" + "mov v2.16b, v16.16b", + "mov w20, v2.s[3]", + "mov x4, x20" ] }, "pinsrb xmm0, eax, 0000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x20" ], "ExpectedArm64ASM": [ - "mov v16.b[0], w4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.b[0], w20", + "mov v16.16b, v3.16b" ] }, "pinsrb xmm0, eax, 0001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x20" ], "ExpectedArm64ASM": [ - "mov v16.b[1], w4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.b[1], w20", + "mov v16.16b, v3.16b" ] }, "pinsrb xmm0, eax, 1111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x20" ], "ExpectedArm64ASM": [ - "mov v16.b[15], w4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.b[15], w20", + "mov v16.16b, v3.16b" ] }, "pinsrb xmm0, [rax], 0000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x20" ], "ExpectedArm64ASM": [ - "ld1 {v16.b}[0], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.b}[0], [x20]", + "mov v16.16b, v3.16b" ] }, "pinsrb xmm0, [rax], 0001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x20" ], "ExpectedArm64ASM": [ - "ld1 {v16.b}[1], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.b}[1], [x20]", + "mov v16.16b, v3.16b" ] }, "pinsrb xmm0, [rax], 1111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x20" ], "ExpectedArm64ASM": [ - "ld1 {v16.b}[15], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.b}[15], [x20]", + "mov v16.16b, v3.16b" ] }, "insertps xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x21" ], "ExpectedArm64ASM": [ - "mov v16.s[0], v17.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov v16.16b, v4.16b" ] }, "insertps xmm0, xmm1, 00001111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x21" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b" ] }, "insertps xmm0, xmm1, 00010000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x21" ], "ExpectedArm64ASM": [ - "mov v16.s[1], v17.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.s[1], v3.s[0]", + "mov v16.16b, v4.16b" ] }, "pinsrd xmm0, eax, 00b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x22" ], "ExpectedArm64ASM": [ - "mov v16.s[0], w4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.s[0], w20", + "mov v16.16b, v3.16b" ] }, "pinsrd xmm0, eax, 01b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x22" ], "ExpectedArm64ASM": [ - "mov v16.s[1], w4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.s[1], w20", + "mov v16.16b, v3.16b" ] }, "pinsrd xmm0, eax, 11b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x22" ], "ExpectedArm64ASM": [ - "mov v16.s[3], w4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.s[3], w20", + "mov v16.16b, v3.16b" ] }, "pinsrq xmm0, rax, 0b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 REX.W 0x0f 0x3a 0x22" ], "ExpectedArm64ASM": [ - "mov v16.d[0], x4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.d[0], x20", + "mov v16.16b, v3.16b" ] }, "pinsrq xmm0, rax, 1b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 REX.W 0x0f 0x3a 0x22" ], "ExpectedArm64ASM": [ - "mov v16.d[1], x4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.d[1], x20", + "mov v16.16b, v3.16b" ] }, "pinsrd xmm0, [rax], 00b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x22" ], "ExpectedArm64ASM": [ - "ld1 {v16.s}[0], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.s}[0], [x20]", + "mov v16.16b, v3.16b" ] }, "pinsrd xmm0, [rax], 01b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x22" ], "ExpectedArm64ASM": [ - "ld1 {v16.s}[1], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.s}[1], [x20]", + "mov v16.16b, v3.16b" ] }, "pinsrd xmm0, [rax], 11b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 0x0f 0x3a 0x22" ], "ExpectedArm64ASM": [ - "ld1 {v16.s}[3], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.s}[3], [x20]", + "mov v16.16b, v3.16b" ] }, "pinsrq xmm0, [rax], 0b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 REX.W 0x0f 0x3a 0x22" ], "ExpectedArm64ASM": [ - "ld1 {v16.d}[0], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.d}[0], [x20]", + "mov v16.16b, v3.16b" ] }, "pinsrq xmm0, [rax], 1b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0x66 REX.W 0x0f 0x3a 0x22" ], "ExpectedArm64ASM": [ - "ld1 {v16.d}[1], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.d}[1], [x20]", + "mov v16.16b, v3.16b" ] }, "dpps xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b" ] }, "dpps xmm0, xmm1, 00001111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b" ] }, "dpps xmm0, xmm1, 11110000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b" ] }, "dpps xmm0, xmm1, 11110001b": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddp v3.4s, v3.4s, v3.4s", + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddp v3.4s, v5.4s, v5.4s", "faddp s3, v3.2s", - "zip1 v16.4s, v3.4s, v2.4s" + "zip1 v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "dpps xmm0, xmm1, 11110010b": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddp v3.4s, v3.4s, v3.4s", + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddp v3.4s, v5.4s, v5.4s", "faddp s3, v3.2s", - "zip1 v16.2s, v2.2s, v3.2s" + "zip1 v4.2s, v2.2s, v3.2s", + "mov v16.16b, v4.16b" ] }, "dpps xmm0, xmm1, 11110011b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ - "fmul v2.4s, v16.4s, v17.4s", - "faddp v2.4s, v2.4s, v2.4s", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fmul v4.4s, v2.4s, v3.4s", + "faddp v2.4s, v4.4s, v4.4s", "faddp s2, v2.2s", - "dup v16.2s, v2.s[0]" + "dup v3.2s, v2.s[0]", + "mov v16.16b, v3.16b" ] }, "dpps xmm0, xmm1, 11110100b": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddp v3.4s, v3.4s, v3.4s", + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddp v3.4s, v5.4s, v5.4s", "faddp s3, v3.2s", - "zip1 v16.2d, v2.2d, v3.2d" + "zip1 v4.2d, v2.2d, v3.2d", + "mov v16.16b, v4.16b" ] }, "dpps xmm0, xmm1, 11110101b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ - "fmul v2.4s, v16.4s, v17.4s", - "faddp v2.4s, v2.4s, v2.4s", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fmul v4.4s, v2.4s, v3.4s", + "faddp v2.4s, v4.4s, v4.4s", "faddp s2, v2.2s", - "zip1 v16.2d, v2.2d, v2.2d" + "zip1 v3.2d, v2.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "dpps xmm0, xmm1, 11110110b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddp v3.4s, v3.4s, v3.4s", + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddp v3.4s, v5.4s, v5.4s", "faddp s3, v3.2s", - "mov v2.s[1], v3.s[0]", - "mov v16.16b, v2.16b", - "mov v16.s[2], v3.s[0]" + "mov v4.16b, v2.16b", + "mov v4.s[1], v3.s[0]", + "mov v2.16b, v4.16b", + "mov v2.s[2], v3.s[0]", + "mov v16.16b, v2.16b" ] }, "dpps xmm0, xmm1, 11110111b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddp v3.4s, v3.4s, v3.4s", + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddp v3.4s, v5.4s, v5.4s", "faddp s3, v3.2s", - "dup v3.4s, v3.s[0]", - "mov v16.16b, v3.16b", - "mov v16.s[3], v2.s[0]" + "dup v4.4s, v3.s[0]", + "mov v3.16b, v4.16b", + "mov v3.s[3], v2.s[0]", + "mov v16.16b, v3.16b" ] }, "dpps xmm0, xmm1, 11111000b": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddp v3.4s, v3.4s, v3.4s", + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddp v3.4s, v5.4s, v5.4s", "faddp s3, v3.2s", - "ext v16.16b, v2.16b, v3.16b, #4" + "ext v4.16b, v2.16b, v3.16b, #4", + "mov v16.16b, v4.16b" ] }, "dpps xmm0, xmm1, 11111001b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddp v3.4s, v3.4s, v3.4s", + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddp v3.4s, v5.4s, v5.4s", "faddp s3, v3.2s", - "mov v2.s[0], v3.s[0]", - "mov v16.16b, v2.16b", - "mov v16.s[3], v3.s[0]" + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov v2.16b, v4.16b", + "mov v2.s[3], v3.s[0]", + "mov v16.16b, v2.16b" ] }, "dpps xmm0, xmm1, 11111010b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddp v3.4s, v3.4s, v3.4s", + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddp v3.4s, v5.4s, v5.4s", "faddp s3, v3.2s", - "dup v3.4s, v3.s[0]", - "zip1 v16.4s, v2.4s, v3.4s" + "dup v4.4s, v3.s[0]", + "zip1 v3.4s, v2.4s, v4.4s", + "mov v16.16b, v3.16b" ] }, "dpps xmm0, xmm1, 11111011b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddp v3.4s, v3.4s, v3.4s", + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddp v3.4s, v5.4s, v5.4s", "faddp s3, v3.2s", - "dup v3.4s, v3.s[0]", - "mov v16.16b, v3.16b", - "mov v16.s[2], v2.s[0]" + "dup v4.4s, v3.s[0]", + "mov v3.16b, v4.16b", + "mov v3.s[2], v2.s[0]", + "mov v16.16b, v3.16b" ] }, "dpps xmm0, xmm1, 11111100b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddp v3.4s, v3.4s, v3.4s", + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddp v3.4s, v5.4s, v5.4s", "faddp s3, v3.2s", - "dup v3.4s, v3.s[0]", - "zip1 v16.2d, v2.2d, v3.2d" + "dup v4.4s, v3.s[0]", + "zip1 v3.2d, v2.2d, v4.2d", + "mov v16.16b, v3.16b" ] }, "dpps xmm0, xmm1, 11111101b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddp v3.4s, v3.4s, v3.4s", + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddp v3.4s, v5.4s, v5.4s", "faddp s3, v3.2s", - "dup v3.4s, v3.s[0]", - "mov v16.16b, v3.16b", - "mov v16.s[1], v2.s[0]" + "dup v4.4s, v3.s[0]", + "mov v3.16b, v4.16b", + "mov v3.s[1], v2.s[0]", + "mov v16.16b, v3.16b" ] }, "dpps xmm0, xmm1, 11111110b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddp v3.4s, v3.4s, v3.4s", + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddp v3.4s, v5.4s, v5.4s", "faddp s3, v3.2s", - "dup v3.4s, v3.s[0]", - "mov v16.16b, v3.16b", - "mov v16.s[0], v2.s[0]" + "dup v4.4s, v3.s[0]", + "mov v3.16b, v4.16b", + "mov v3.s[0], v2.s[0]", + "mov v16.16b, v3.16b" ] }, "dpps xmm0, xmm1, 11111111b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ - "fmul v2.4s, v16.4s, v17.4s", - "faddp v2.4s, v2.4s, v2.4s", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fmul v4.4s, v2.4s, v3.4s", + "faddp v2.4s, v4.4s, v4.4s", "faddp s2, v2.2s", - "dup v16.4s, v2.s[0]" + "dup v3.4s, v2.s[0]", + "mov v16.16b, v3.16b" ] }, "dppd xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x41" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b" ] }, "dppd xmm0, xmm1, 00001111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x41" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b" ] }, "dppd xmm0, xmm1, 11110000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x41" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b" ] }, "dppd xmm0, xmm1, 11111111b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x66 0x0f 0x3a 0x41" ], "ExpectedArm64ASM": [ - "fmul v2.2d, v16.2d, v17.2d", - "faddp d2, v2.2d", - "dup v16.2d, v2.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fmul v4.2d, v2.2d, v3.2d", + "faddp d2, v4.2d", + "dup v3.2d, v2.d[0]", + "mov v16.16b, v3.16b" ] }, "mpsadbw xmm0, xmm1, 000b": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "0x66 0x0f 0x3a 0x42" ], "ExpectedArm64ASM": [ - "dup v2.4s, v17.s[0]", - "ext v3.16b, v16.16b, v16.16b, #0", - "ext v4.16b, v16.16b, v16.16b, #1", - "ext v5.16b, v16.16b, v16.16b, #2", - "ext v6.16b, v16.16b, v16.16b, #3", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v16.8h, v4.8h, v2.8h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v3.s[0]", + "ext v3.16b, v2.16b, v2.16b, #0", + "ext v5.16b, v2.16b, v2.16b, #1", + "ext v6.16b, v2.16b, v2.16b, #2", + "ext v7.16b, v2.16b, v2.16b, #3", + "uabdl v2.8h, v3.8b, v4.8b", + "uabdl v3.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "addp v4.8h, v2.8h, v5.8h", + "addp v2.8h, v3.8h, v6.8h", + "trn1 v3.4s, v4.4s, v2.4s", + "trn2 v5.4s, v4.4s, v2.4s", + "addp v2.8h, v3.8h, v5.8h", + "mov v16.16b, v2.16b" ] }, "mpsadbw xmm0, xmm1, 001b": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "0x66 0x0f 0x3a 0x42" ], "ExpectedArm64ASM": [ - "dup v2.4s, v17.s[1]", - "ext v3.16b, v16.16b, v16.16b, #0", - "ext v4.16b, v16.16b, v16.16b, #1", - "ext v5.16b, v16.16b, v16.16b, #2", - "ext v6.16b, v16.16b, v16.16b, #3", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v16.8h, v4.8h, v2.8h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v3.s[1]", + "ext v3.16b, v2.16b, v2.16b, #0", + "ext v5.16b, v2.16b, v2.16b, #1", + "ext v6.16b, v2.16b, v2.16b, #2", + "ext v7.16b, v2.16b, v2.16b, #3", + "uabdl v2.8h, v3.8b, v4.8b", + "uabdl v3.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "addp v4.8h, v2.8h, v5.8h", + "addp v2.8h, v3.8h, v6.8h", + "trn1 v3.4s, v4.4s, v2.4s", + "trn2 v5.4s, v4.4s, v2.4s", + "addp v2.8h, v3.8h, v5.8h", + "mov v16.16b, v2.16b" ] }, "mpsadbw xmm0, xmm1, 010b": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "0x66 0x0f 0x3a 0x42" ], "ExpectedArm64ASM": [ - "dup v2.4s, v17.s[2]", - "ext v3.16b, v16.16b, v16.16b, #0", - "ext v4.16b, v16.16b, v16.16b, #1", - "ext v5.16b, v16.16b, v16.16b, #2", - "ext v6.16b, v16.16b, v16.16b, #3", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v16.8h, v4.8h, v2.8h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v3.s[2]", + "ext v3.16b, v2.16b, v2.16b, #0", + "ext v5.16b, v2.16b, v2.16b, #1", + "ext v6.16b, v2.16b, v2.16b, #2", + "ext v7.16b, v2.16b, v2.16b, #3", + "uabdl v2.8h, v3.8b, v4.8b", + "uabdl v3.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "addp v4.8h, v2.8h, v5.8h", + "addp v2.8h, v3.8h, v6.8h", + "trn1 v3.4s, v4.4s, v2.4s", + "trn2 v5.4s, v4.4s, v2.4s", + "addp v2.8h, v3.8h, v5.8h", + "mov v16.16b, v2.16b" ] }, "mpsadbw xmm0, xmm1, 011b": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "0x66 0x0f 0x3a 0x42" ], "ExpectedArm64ASM": [ - "dup v2.4s, v17.s[3]", - "ext v3.16b, v16.16b, v16.16b, #0", - "ext v4.16b, v16.16b, v16.16b, #1", - "ext v5.16b, v16.16b, v16.16b, #2", - "ext v6.16b, v16.16b, v16.16b, #3", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v16.8h, v4.8h, v2.8h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v3.s[3]", + "ext v3.16b, v2.16b, v2.16b, #0", + "ext v5.16b, v2.16b, v2.16b, #1", + "ext v6.16b, v2.16b, v2.16b, #2", + "ext v7.16b, v2.16b, v2.16b, #3", + "uabdl v2.8h, v3.8b, v4.8b", + "uabdl v3.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "addp v4.8h, v2.8h, v5.8h", + "addp v2.8h, v3.8h, v6.8h", + "trn1 v3.4s, v4.4s, v2.4s", + "trn2 v5.4s, v4.4s, v2.4s", + "addp v2.8h, v3.8h, v5.8h", + "mov v16.16b, v2.16b" ] }, "mpsadbw xmm0, xmm1, 100b": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "0x66 0x0f 0x3a 0x42" ], "ExpectedArm64ASM": [ - "dup v2.4s, v17.s[0]", - "ext v3.16b, v16.16b, v16.16b, #4", - "ext v4.16b, v16.16b, v16.16b, #5", - "ext v5.16b, v16.16b, v16.16b, #6", - "ext v6.16b, v16.16b, v16.16b, #7", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v16.8h, v4.8h, v2.8h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v3.s[0]", + "ext v3.16b, v2.16b, v2.16b, #4", + "ext v5.16b, v2.16b, v2.16b, #5", + "ext v6.16b, v2.16b, v2.16b, #6", + "ext v7.16b, v2.16b, v2.16b, #7", + "uabdl v2.8h, v3.8b, v4.8b", + "uabdl v3.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "addp v4.8h, v2.8h, v5.8h", + "addp v2.8h, v3.8h, v6.8h", + "trn1 v3.4s, v4.4s, v2.4s", + "trn2 v5.4s, v4.4s, v2.4s", + "addp v2.8h, v3.8h, v5.8h", + "mov v16.16b, v2.16b" ] }, "mpsadbw xmm0, xmm1, 101b": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "0x66 0x0f 0x3a 0x42" ], "ExpectedArm64ASM": [ - "dup v2.4s, v17.s[1]", - "ext v3.16b, v16.16b, v16.16b, #4", - "ext v4.16b, v16.16b, v16.16b, #5", - "ext v5.16b, v16.16b, v16.16b, #6", - "ext v6.16b, v16.16b, v16.16b, #7", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v16.8h, v4.8h, v2.8h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v3.s[1]", + "ext v3.16b, v2.16b, v2.16b, #4", + "ext v5.16b, v2.16b, v2.16b, #5", + "ext v6.16b, v2.16b, v2.16b, #6", + "ext v7.16b, v2.16b, v2.16b, #7", + "uabdl v2.8h, v3.8b, v4.8b", + "uabdl v3.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "addp v4.8h, v2.8h, v5.8h", + "addp v2.8h, v3.8h, v6.8h", + "trn1 v3.4s, v4.4s, v2.4s", + "trn2 v5.4s, v4.4s, v2.4s", + "addp v2.8h, v3.8h, v5.8h", + "mov v16.16b, v2.16b" ] }, "mpsadbw xmm0, xmm1, 110b": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "0x66 0x0f 0x3a 0x42" ], "ExpectedArm64ASM": [ - "dup v2.4s, v17.s[2]", - "ext v3.16b, v16.16b, v16.16b, #4", - "ext v4.16b, v16.16b, v16.16b, #5", - "ext v5.16b, v16.16b, v16.16b, #6", - "ext v6.16b, v16.16b, v16.16b, #7", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v16.8h, v4.8h, v2.8h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v3.s[2]", + "ext v3.16b, v2.16b, v2.16b, #4", + "ext v5.16b, v2.16b, v2.16b, #5", + "ext v6.16b, v2.16b, v2.16b, #6", + "ext v7.16b, v2.16b, v2.16b, #7", + "uabdl v2.8h, v3.8b, v4.8b", + "uabdl v3.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "addp v4.8h, v2.8h, v5.8h", + "addp v2.8h, v3.8h, v6.8h", + "trn1 v3.4s, v4.4s, v2.4s", + "trn2 v5.4s, v4.4s, v2.4s", + "addp v2.8h, v3.8h, v5.8h", + "mov v16.16b, v2.16b" ] }, "mpsadbw xmm0, xmm1, 111b": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "0x66 0x0f 0x3a 0x42" ], "ExpectedArm64ASM": [ - "dup v2.4s, v17.s[3]", - "ext v3.16b, v16.16b, v16.16b, #4", - "ext v4.16b, v16.16b, v16.16b, #5", - "ext v5.16b, v16.16b, v16.16b, #6", - "ext v6.16b, v16.16b, v16.16b, #7", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v16.8h, v4.8h, v2.8h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v3.s[3]", + "ext v3.16b, v2.16b, v2.16b, #4", + "ext v5.16b, v2.16b, v2.16b, #5", + "ext v6.16b, v2.16b, v2.16b, #6", + "ext v7.16b, v2.16b, v2.16b, #7", + "uabdl v2.8h, v3.8b, v4.8b", + "uabdl v3.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "addp v4.8h, v2.8h, v5.8h", + "addp v2.8h, v3.8h, v6.8h", + "trn1 v3.4s, v4.4s, v2.4s", + "trn2 v5.4s, v4.4s, v2.4s", + "addp v2.8h, v3.8h, v5.8h", + "mov v16.16b, v2.16b" ] }, "sha1rnds4 xmm0, xmm1, 00b": { - "ExpectedInstructionCount": 57, + "ExpectedInstructionCount": 64, "Comment": [ "0x66 0x0f 0x3a 0xcc" ], "ExpectedArm64ASM": [ + "sub sp, sp, #0x60 (96)", "mov w20, #0x7999", "movk w20, #0x5a82, lsl #16", - "mov w20, v17.s[3]", - "mov w21, v16.s[3]", - "mov w22, v16.s[2]", - "mov w23, v16.s[1]", - "mov w24, v16.s[0]", - "and w25, w22, w23", - "bic w30, w24, w22", - "eor w25, w25, w30", - "ror w30, w21, #27", - "add w25, w25, w30", - "add w20, w25, w20", - "mov w25, #0x7999", - "movk w25, #0x5a82, lsl #16", - "add w20, w20, w25", - "ror w22, w22, #2", - "mov w25, v17.s[2]", - "add w24, w25, w24", - "and w25, w21, w22", - "bic w30, w23, w21", - "eor w25, w25, w30", - "ror w30, w20, #27", - "add w25, w25, w30", - "add w24, w25, w24", - "mov w25, #0x7999", - "movk w25, #0x5a82, lsl #16", - "add w24, w24, w25", - "ror w21, w21, #2", - "mov w25, v17.s[1]", - "add w23, w25, w23", - "and w25, w20, w21", - "bic w30, w22, w20", - "eor w25, w25, w30", - "ror w30, w24, #27", - "add w25, w25, w30", - "add w23, w25, w23", - "mov w25, #0x7999", - "movk w25, #0x5a82, lsl #16", - "add w23, w23, w25", - "ror w20, w20, #2", - "mov w30, v17.s[0]", - "add w22, w30, w22", - "and w30, w24, w20", - "bic w21, w21, w24", - "eor w21, w30, w21", - "ror w30, w23, #27", - "add w21, w21, w30", - "add w21, w21, w22", - "add w21, w21, w25", - "ror w22, w24, #2", - "mov v2.16b, v16.16b", - "mov v2.s[3], w21", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov w21, v3.s[3]", + "mov w22, v2.s[3]", + "mov w23, v2.s[2]", + "mov w24, v2.s[1]", + "mov w25, v2.s[0]", + "and w30, w23, w24", + "bic w18, w25, w23", + "str w24, [sp]", + "eor w24, w30, w18", + "ror w30, w22, #27", + "add w18, w24, w30", + "add w24, w18, w21", + "add w21, w24, w20", + "ror w24, w23, #2", + "mov w23, v3.s[2]", + "add w30, w23, w25", + "and w23, w22, w24", + "ldr w25, [sp]", + "bic w18, w25, w22", + "str w24, [sp, #32]", + "eor w24, w23, w18", + "ror w23, w21, #27", + "add w18, w24, w23", + "add w23, w18, w30", + "add w24, w23, w20", + "ror w23, w22, #2", + "mov w22, v3.s[1]", + "add w30, w22, w25", + "and w22, w21, w23", + "ldr w25, [sp, #32]", + "bic w18, w25, w21", + "str w23, [sp, #64]", + "eor w23, w22, w18", + "ror w22, w24, #27", + "add w18, w23, w22", + "add w22, w18, w30", + "add w23, w22, w20", + "ror w22, w21, #2", + "mov w21, v3.s[0]", + "add w30, w21, w25", + "and w21, w24, w22", + "ldr w25, [sp, #64]", + "bic w18, w25, w24", + "eor w25, w21, w18", + "ror w21, w23, #27", + "add w18, w25, w21", + "add w21, w18, w30", + "add w25, w21, w20", + "ror w20, w24, #2", + "mov v3.16b, v2.16b", + "mov v3.s[3], w25", + "mov v2.16b, v3.16b", "mov v2.s[2], w23", - "mov v2.s[1], w22", + "mov v3.16b, v2.16b", + "mov v3.s[1], w20", + "mov v2.16b, v3.16b", + "mov v2.s[0], w22", "mov v16.16b, v2.16b", - "mov v16.s[0], w20" + "add sp, sp, #0x60 (96)" ] }, "sha1rnds4 xmm0, xmm1, 01b": { - "ExpectedInstructionCount": 53, + "ExpectedInstructionCount": 60, "Comment": [ "0x66 0x0f 0x3a 0xcc" ], "ExpectedArm64ASM": [ + "sub sp, sp, #0x60 (96)", "mov w20, #0xeba1", "movk w20, #0x6ed9, lsl #16", - "mov w20, v17.s[3]", - "mov w21, v16.s[3]", - "mov w22, v16.s[2]", - "mov w23, v16.s[1]", - "mov w24, v16.s[0]", - "eor w25, w22, w23", - "eor w25, w25, w24", - "ror w30, w21, #27", - "add w25, w25, w30", - "add w20, w25, w20", - "mov w25, #0xeba1", - "movk w25, #0x6ed9, lsl #16", - "add w20, w20, w25", - "ror w22, w22, #2", - "mov w25, v17.s[2]", - "add w24, w25, w24", - "eor w25, w21, w22", - "eor w25, w25, w23", - "ror w30, w20, #27", - "add w25, w25, w30", - "add w24, w25, w24", - "mov w25, #0xeba1", - "movk w25, #0x6ed9, lsl #16", - "add w24, w24, w25", - "ror w21, w21, #2", - "mov w25, v17.s[1]", - "add w23, w25, w23", - "eor w25, w20, w21", - "eor w25, w25, w22", - "ror w30, w24, #27", - "add w25, w25, w30", - "add w23, w25, w23", - "mov w25, #0xeba1", - "movk w25, #0x6ed9, lsl #16", - "add w23, w23, w25", - "ror w20, w20, #2", - "mov w30, v17.s[0]", - "add w22, w30, w22", - "eor w30, w24, w20", - "eor w21, w30, w21", - "ror w30, w23, #27", - "add w21, w21, w30", - "add w21, w21, w22", - "add w21, w21, w25", - "ror w22, w24, #2", - "mov v2.16b, v16.16b", - "mov v2.s[3], w21", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov w21, v3.s[3]", + "mov w22, v2.s[3]", + "mov w23, v2.s[2]", + "mov w24, v2.s[1]", + "mov w25, v2.s[0]", + "eor w30, w23, w24", + "eor w18, w30, w25", + "ror w30, w22, #27", + "str w24, [sp]", + "add w24, w18, w30", + "add w30, w24, w21", + "add w21, w30, w20", + "ror w24, w23, #2", + "mov w23, v3.s[2]", + "add w30, w23, w25", + "eor w23, w22, w24", + "ldr w25, [sp]", + "eor w18, w23, w25", + "ror w23, w21, #27", + "str w24, [sp, #32]", + "add w24, w18, w23", + "add w23, w24, w30", + "add w24, w23, w20", + "ror w23, w22, #2", + "mov w22, v3.s[1]", + "add w30, w22, w25", + "eor w22, w21, w23", + "ldr w25, [sp, #32]", + "eor w18, w22, w25", + "ror w22, w24, #27", + "str w23, [sp, #64]", + "add w23, w18, w22", + "add w22, w23, w30", + "add w23, w22, w20", + "ror w22, w21, #2", + "mov w21, v3.s[0]", + "add w30, w21, w25", + "eor w21, w24, w22", + "ldr w25, [sp, #64]", + "eor w18, w21, w25", + "ror w21, w23, #27", + "add w25, w18, w21", + "add w21, w25, w30", + "add w25, w21, w20", + "ror w20, w24, #2", + "mov v3.16b, v2.16b", + "mov v3.s[3], w25", + "mov v2.16b, v3.16b", "mov v2.s[2], w23", - "mov v2.s[1], w22", + "mov v3.16b, v2.16b", + "mov v3.s[1], w20", + "mov v2.16b, v3.16b", + "mov v2.s[0], w22", "mov v16.16b, v2.16b", - "mov v16.s[0], w20" + "add sp, sp, #0x60 (96)" ] }, "sha1rnds4 xmm0, xmm1, 10b": { - "ExpectedInstructionCount": 61, + "ExpectedInstructionCount": 68, "Comment": [ "0x66 0x0f 0x3a 0xcc" ], "ExpectedArm64ASM": [ + "sub sp, sp, #0x60 (96)", "mov w20, #0xbcdc", "movk w20, #0x8f1b, lsl #16", - "mov w20, v17.s[3]", - "mov w21, v16.s[3]", - "mov w22, v16.s[2]", - "mov w23, v16.s[1]", - "mov w24, v16.s[0]", - "and w25, w23, w24", - "orr w30, w23, w24", - "and w30, w22, w30", - "orr w25, w30, w25", - "ror w30, w21, #27", - "add w25, w25, w30", - "add w20, w25, w20", - "mov w25, #0xbcdc", - "movk w25, #0x8f1b, lsl #16", - "add w20, w20, w25", - "ror w22, w22, #2", - "mov w25, v17.s[2]", - "add w24, w25, w24", - "and w25, w22, w23", - "orr w30, w22, w23", - "and w30, w21, w30", - "orr w25, w30, w25", - "ror w30, w20, #27", - "add w25, w25, w30", - "add w24, w25, w24", - "mov w25, #0xbcdc", - "movk w25, #0x8f1b, lsl #16", - "add w24, w24, w25", - "ror w21, w21, #2", - "mov w25, v17.s[1]", - "add w23, w25, w23", - "and w25, w21, w22", - "orr w30, w21, w22", - "and w30, w20, w30", - "orr w25, w30, w25", - "ror w30, w24, #27", - "add w25, w25, w30", - "add w23, w25, w23", - "mov w25, #0xbcdc", - "movk w25, #0x8f1b, lsl #16", - "add w23, w23, w25", - "ror w20, w20, #2", - "mov w30, v17.s[0]", - "add w22, w30, w22", - "and w30, w20, w21", - "orr w21, w20, w21", - "and w21, w24, w21", - "orr w21, w21, w30", - "ror w30, w23, #27", - "add w21, w21, w30", - "add w21, w21, w22", - "add w21, w21, w25", - "ror w22, w24, #2", - "mov v2.16b, v16.16b", - "mov v2.s[3], w21", - "mov v2.s[2], w23", - "mov v2.s[1], w22", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov w21, v3.s[3]", + "mov w22, v2.s[3]", + "mov w23, v2.s[2]", + "mov w24, v2.s[1]", + "mov w25, v2.s[0]", + "and w30, w24, w25", + "orr w18, w24, w25", + "str w24, [sp]", + "and w24, w23, w18", + "orr w18, w24, w30", + "ror w24, w22, #27", + "add w30, w18, w24", + "add w24, w30, w21", + "add w21, w24, w20", + "ror w24, w23, #2", + "mov w23, v3.s[2]", + "add w30, w23, w25", + "ldr w23, [sp]", + "and w25, w24, w23", + "orr w18, w24, w23", + "str w24, [sp, #32]", + "and w24, w22, w18", + "orr w18, w24, w25", + "ror w24, w21, #27", + "add w25, w18, w24", + "add w24, w25, w30", + "add w25, w24, w20", + "ror w24, w22, #2", + "mov w22, v3.s[1]", + "add w30, w22, w23", + "ldr w22, [sp, #32]", + "and w23, w24, w22", + "orr w18, w24, w22", + "str w24, [sp, #64]", + "and w24, w21, w18", + "orr w18, w24, w23", + "ror w23, w25, #27", + "add w24, w18, w23", + "add w23, w24, w30", + "add w24, w23, w20", + "ror w23, w21, #2", + "mov w21, v3.s[0]", + "add w30, w21, w22", + "ldr w21, [sp, #64]", + "and w22, w23, w21", + "orr w18, w23, w21", + "and w21, w25, w18", + "orr w18, w21, w22", + "ror w21, w24, #27", + "add w22, w18, w21", + "add w21, w22, w30", + "add w22, w21, w20", + "ror w20, w25, #2", + "mov v3.16b, v2.16b", + "mov v3.s[3], w22", + "mov v2.16b, v3.16b", + "mov v2.s[2], w24", + "mov v3.16b, v2.16b", + "mov v3.s[1], w20", + "mov v2.16b, v3.16b", + "mov v2.s[0], w23", "mov v16.16b, v2.16b", - "mov v16.s[0], w20" + "add sp, sp, #0x60 (96)" ] }, "sha1rnds4 xmm0, xmm1, 11b": { - "ExpectedInstructionCount": 53, + "ExpectedInstructionCount": 60, "Comment": [ "0x66 0x0f 0x3a 0xcc" ], "ExpectedArm64ASM": [ + "sub sp, sp, #0x60 (96)", "mov w20, #0xc1d6", "movk w20, #0xca62, lsl #16", - "mov w20, v17.s[3]", - "mov w21, v16.s[3]", - "mov w22, v16.s[2]", - "mov w23, v16.s[1]", - "mov w24, v16.s[0]", - "eor w25, w22, w23", - "eor w25, w25, w24", - "ror w30, w21, #27", - "add w25, w25, w30", - "add w20, w25, w20", - "mov w25, #0xc1d6", - "movk w25, #0xca62, lsl #16", - "add w20, w20, w25", - "ror w22, w22, #2", - "mov w25, v17.s[2]", - "add w24, w25, w24", - "eor w25, w21, w22", - "eor w25, w25, w23", - "ror w30, w20, #27", - "add w25, w25, w30", - "add w24, w25, w24", - "mov w25, #0xc1d6", - "movk w25, #0xca62, lsl #16", - "add w24, w24, w25", - "ror w21, w21, #2", - "mov w25, v17.s[1]", - "add w23, w25, w23", - "eor w25, w20, w21", - "eor w25, w25, w22", - "ror w30, w24, #27", - "add w25, w25, w30", - "add w23, w25, w23", - "mov w25, #0xc1d6", - "movk w25, #0xca62, lsl #16", - "add w23, w23, w25", - "ror w20, w20, #2", - "mov w30, v17.s[0]", - "add w22, w30, w22", - "eor w30, w24, w20", - "eor w21, w30, w21", - "ror w30, w23, #27", - "add w21, w21, w30", - "add w21, w21, w22", - "add w21, w21, w25", - "ror w22, w24, #2", - "mov v2.16b, v16.16b", - "mov v2.s[3], w21", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov w21, v3.s[3]", + "mov w22, v2.s[3]", + "mov w23, v2.s[2]", + "mov w24, v2.s[1]", + "mov w25, v2.s[0]", + "eor w30, w23, w24", + "eor w18, w30, w25", + "ror w30, w22, #27", + "str w24, [sp]", + "add w24, w18, w30", + "add w30, w24, w21", + "add w21, w30, w20", + "ror w24, w23, #2", + "mov w23, v3.s[2]", + "add w30, w23, w25", + "eor w23, w22, w24", + "ldr w25, [sp]", + "eor w18, w23, w25", + "ror w23, w21, #27", + "str w24, [sp, #32]", + "add w24, w18, w23", + "add w23, w24, w30", + "add w24, w23, w20", + "ror w23, w22, #2", + "mov w22, v3.s[1]", + "add w30, w22, w25", + "eor w22, w21, w23", + "ldr w25, [sp, #32]", + "eor w18, w22, w25", + "ror w22, w24, #27", + "str w23, [sp, #64]", + "add w23, w18, w22", + "add w22, w23, w30", + "add w23, w22, w20", + "ror w22, w21, #2", + "mov w21, v3.s[0]", + "add w30, w21, w25", + "eor w21, w24, w22", + "ldr w25, [sp, #64]", + "eor w18, w21, w25", + "ror w21, w23, #27", + "add w25, w18, w21", + "add w21, w25, w30", + "add w25, w21, w20", + "ror w20, w24, #2", + "mov v3.16b, v2.16b", + "mov v3.s[3], w25", + "mov v2.16b, v3.16b", "mov v2.s[2], w23", - "mov v2.s[1], w22", + "mov v3.16b, v2.16b", + "mov v3.s[1], w20", + "mov v2.16b, v3.16b", + "mov v2.s[0], w22", "mov v16.16b, v2.16b", - "mov v16.s[0], w20" + "add sp, sp, #0x60 (96)" ] } } diff --git a/unittests/InstructionCountCI/H0F3A_SVE128.json b/unittests/InstructionCountCI/H0F3A_SVE128.json index 18d1ca3396..c58985af9f 100644 --- a/unittests/InstructionCountCI/H0F3A_SVE128.json +++ b/unittests/InstructionCountCI/H0F3A_SVE128.json @@ -11,259 +11,315 @@ }, "Instructions": { "dpps xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b" ] }, "dpps xmm0, xmm1, 00001111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b" ] }, "dpps xmm0, xmm1, 11110000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b" ] }, "dpps xmm0, xmm1, 11110001b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddv s3, p6, z3.s", - "zip1 v16.4s, v3.4s, v2.4s" + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddv s3, p6, z5.s", + "zip1 v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "dpps xmm0, xmm1, 11110010b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddv s3, p6, z3.s", - "zip1 v16.2s, v2.2s, v3.2s" + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddv s3, p6, z5.s", + "zip1 v4.2s, v2.2s, v3.2s", + "mov v16.16b, v4.16b" ] }, "dpps xmm0, xmm1, 11110011b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ - "fmul v2.4s, v16.4s, v17.4s", - "faddv s2, p6, z2.s", - "dup v16.2s, v2.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fmul v4.4s, v2.4s, v3.4s", + "faddv s2, p6, z4.s", + "dup v3.2s, v2.s[0]", + "mov v16.16b, v3.16b" ] }, "dpps xmm0, xmm1, 11110100b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddv s3, p6, z3.s", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddv s3, p6, z5.s", + "zip1 v4.2d, v2.2d, v3.2d", + "mov v16.16b, v4.16b" ] }, "dpps xmm0, xmm1, 11110101b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ - "fmul v2.4s, v16.4s, v17.4s", - "faddv s2, p6, z2.s", - "zip1 v16.2d, v2.2d, v2.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fmul v4.4s, v2.4s, v3.4s", + "faddv s2, p6, z4.s", + "zip1 v3.2d, v2.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "dpps xmm0, xmm1, 11110110b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddv s3, p6, z3.s", - "mov v2.s[1], v3.s[0]", - "mov v16.16b, v2.16b", - "mov v16.s[2], v3.s[0]" + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddv s3, p6, z5.s", + "mov v4.16b, v2.16b", + "mov v4.s[1], v3.s[0]", + "mov v2.16b, v4.16b", + "mov v2.s[2], v3.s[0]", + "mov v16.16b, v2.16b" ] }, "dpps xmm0, xmm1, 11110111b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddv s3, p6, z3.s", - "dup v3.4s, v3.s[0]", - "mov v16.16b, v3.16b", - "mov v16.s[3], v2.s[0]" + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddv s3, p6, z5.s", + "dup v4.4s, v3.s[0]", + "mov v3.16b, v4.16b", + "mov v3.s[3], v2.s[0]", + "mov v16.16b, v3.16b" ] }, "dpps xmm0, xmm1, 11111000b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddv s3, p6, z3.s", - "ext v16.16b, v2.16b, v3.16b, #4" + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddv s3, p6, z5.s", + "ext v4.16b, v2.16b, v3.16b, #4", + "mov v16.16b, v4.16b" ] }, "dpps xmm0, xmm1, 11111001b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddv s3, p6, z3.s", - "mov v2.s[0], v3.s[0]", - "mov v16.16b, v2.16b", - "mov v16.s[3], v3.s[0]" + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddv s3, p6, z5.s", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov v2.16b, v4.16b", + "mov v2.s[3], v3.s[0]", + "mov v16.16b, v2.16b" ] }, "dpps xmm0, xmm1, 11111010b": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddv s3, p6, z3.s", - "dup v3.4s, v3.s[0]", - "zip1 v16.4s, v2.4s, v3.4s" + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddv s3, p6, z5.s", + "dup v4.4s, v3.s[0]", + "zip1 v3.4s, v2.4s, v4.4s", + "mov v16.16b, v3.16b" ] }, "dpps xmm0, xmm1, 11111011b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddv s3, p6, z3.s", - "dup v3.4s, v3.s[0]", - "mov v16.16b, v3.16b", - "mov v16.s[2], v2.s[0]" + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddv s3, p6, z5.s", + "dup v4.4s, v3.s[0]", + "mov v3.16b, v4.16b", + "mov v3.s[2], v2.s[0]", + "mov v16.16b, v3.16b" ] }, "dpps xmm0, xmm1, 11111100b": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddv s3, p6, z3.s", - "dup v3.4s, v3.s[0]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddv s3, p6, z5.s", + "dup v4.4s, v3.s[0]", + "zip1 v3.2d, v2.2d, v4.2d", + "mov v16.16b, v3.16b" ] }, "dpps xmm0, xmm1, 11111101b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddv s3, p6, z3.s", - "dup v3.4s, v3.s[0]", - "mov v16.16b, v3.16b", - "mov v16.s[1], v2.s[0]" + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddv s3, p6, z5.s", + "dup v4.4s, v3.s[0]", + "mov v3.16b, v4.16b", + "mov v3.s[1], v2.s[0]", + "mov v16.16b, v3.16b" ] }, "dpps xmm0, xmm1, 11111110b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ "movi v2.2d, #0x0", - "fmul v3.4s, v16.4s, v17.4s", - "faddv s3, p6, z3.s", - "dup v3.4s, v3.s[0]", - "mov v16.16b, v3.16b", - "mov v16.s[0], v2.s[0]" + "mov v3.16b, v16.16b", + "mov v4.16b, v17.16b", + "fmul v5.4s, v3.4s, v4.4s", + "faddv s3, p6, z5.s", + "dup v4.4s, v3.s[0]", + "mov v3.16b, v4.16b", + "mov v3.s[0], v2.s[0]", + "mov v16.16b, v3.16b" ] }, "dpps xmm0, xmm1, 11111111b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x66 0x0f 0x3a 0x40" ], "ExpectedArm64ASM": [ - "fmul v2.4s, v16.4s, v17.4s", - "faddv s2, p6, z2.s", - "dup v16.4s, v2.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fmul v4.4s, v2.4s, v3.4s", + "faddv s2, p6, z4.s", + "dup v3.4s, v2.s[0]", + "mov v16.16b, v3.16b" ] }, "dppd xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x41" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b" ] }, "dppd xmm0, xmm1, 00001111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x41" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b" ] }, "dppd xmm0, xmm1, 11110000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "0x66 0x0f 0x3a 0x41" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b" ] }, "dppd xmm0, xmm1, 11111111b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x66 0x0f 0x3a 0x41" ], "ExpectedArm64ASM": [ - "fmul v2.2d, v16.2d, v17.2d", - "faddv d2, p6, z2.d", - "dup v16.2d, v2.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fmul v4.2d, v2.2d, v3.2d", + "faddv d2, p6, z4.d", + "dup v3.2d, v2.d[0]", + "mov v16.16b, v3.16b" ] } } diff --git a/unittests/InstructionCountCI/Primary.json b/unittests/InstructionCountCI/Primary.json index 5fcaf32e0c..7c7a317908 100644 --- a/unittests/InstructionCountCI/Primary.json +++ b/unittests/InstructionCountCI/Primary.json @@ -11,2410 +11,3129 @@ }, "Instructions": { "add bl, cl": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 11, "Comment": "0x00", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "lsl w0, w7, #24", - "cmn w0, w5, lsl #24", - "add w26, w7, w5", - "bfxil x7, x26, #0, #8" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #24", + "cmn w0, w20, lsl #24", + "add w22, w21, w20", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x7, x20" ] }, "add bx, cx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 11, "Comment": "0x01", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "lsl w0, w7, #16", - "cmn w0, w5, lsl #16", - "add w26, w7, w5", - "bfxil x7, x26, #0, #16" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #16", + "cmn w0, w20, lsl #16", + "add w22, w21, w20", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x7, x20" ] }, "add ebx, ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x01", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "adds w26, w7, w5", - "mov x7, x26" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "adds w22, w21, w20", + "mov x26, x22", + "mov x7, x22" ] }, "add rbx, rcx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x01", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "adds x26, x7, x5", - "mov x7, x26" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "adds x22, x21, x20", + "mov x26, x22", + "mov x7, x22" ] }, "db 0x02, 0xcb": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 11, "Comment": [ "0x02", "add bl, cl but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "lsl w0, w5, #24", - "cmn w0, w7, lsl #24", - "add w26, w5, w7", - "bfxil x5, x26, #0, #8" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #24", + "cmn w0, w20, lsl #24", + "add w22, w21, w20", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x5, x20" ] }, "db 0x66, 0x03, 0xcb": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 11, "Comment": [ "0x03", "add bx, cx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "lsl w0, w5, #16", - "cmn w0, w7, lsl #16", - "add w26, w5, w7", - "bfxil x5, x26, #0, #16" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #16", + "cmn w0, w20, lsl #16", + "add w22, w21, w20", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x5, x20" ] }, "db 0x03, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0x03", "add ebx, ecx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "adds w26, w5, w7", - "mov x5, x26" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "adds w22, w21, w20", + "mov x26, x22", + "mov x5, x22" ] }, "db 0x48, 0x03, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0x03", "add rbx, rcx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "adds x26, x5, x7", - "mov x5, x26" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "adds x22, x21, x20", + "mov x26, x22", + "mov x5, x22" ] }, "add al, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "0x04", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #24", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #24", "cmn w0, w20, lsl #24", - "add w26, w27, #0x1 (1)", - "mov x4, x27", - "bfxil x4, x26, #0, #8" + "add w20, w21, #0x1 (1)", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "add ax, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "0x05", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #16", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #16", "cmn w0, w20, lsl #16", - "add w26, w27, #0x1 (1)", - "mov x4, x27", - "bfxil x4, x26, #0, #16" + "add w20, w21, #0x1 (1)", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "add eax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x05", "ExpectedArm64ASM": [ - "mov x27, x4", - "adds w26, w27, #0x1 (1)", - "mov x4, x26" + "mov x20, x4", + "mov x27, x20", + "adds w21, w20, #0x1 (1)", + "mov x26, x21", + "mov x4, x21" ] }, "add rax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x05", "ExpectedArm64ASM": [ - "mov x27, x4", - "adds x26, x27, #0x1 (1)", - "mov x4, x26" + "mov x20, x4", + "mov x27, x20", + "adds x21, x20, #0x1 (1)", + "mov x26, x21", + "mov x4, x21" ] }, "add al, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": "0x04", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "lsl w0, w4, #24", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #24", "cmn w0, w20, lsl #24", - "add w26, w4, #0xff (255)", - "bfxil x4, x26, #0, #8" + "add w20, w21, #0xff (255)", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "add ax, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": "0x05", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "mvn w27, w4", - "lsl w0, w4, #16", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #16", "cmn w0, w20, lsl #16", - "add w26, w4, w20", - "bfxil x4, x26, #0, #16" + "add w22, w21, w20", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x4, x20" ] }, "add eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x05", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "adds w26, w4, w20", - "mov x4, x26" + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "adds w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "add rax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x05", "ExpectedArm64ASM": [ - "mvn w27, w4", - "subs x26, x4, #0x1 (1)", - "mov x4, x26" + "mov x20, x4", + "mvn w21, w20", + "mov x27, x21", + "subs x21, x20, #0x1 (1)", + "mov x26, x21", + "mov x4, x21" ] }, "or bl, bh": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": "", "ExpectedArm64ASM": [ - "lsr w20, w7, #8", - "orr w26, w7, w20", - "bfxil x7, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x7", + "lsr w21, w20, #8", + "orr w22, w20, w21", + "mov x21, x20", + "bfxil x21, x22, #0, #8", + "mov x7, x21", + "mov x26, x22", + "cmn wzr, w22, lsl #24" ] }, "or bl, cl": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0x08", "ExpectedArm64ASM": [ - "orr w26, w7, w5", - "bfxil x7, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x5", + "mov x21, x7", + "orr w22, w21, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x7, x20", + "mov x26, x22", + "cmn wzr, w22, lsl #24" ] }, "or bx, cx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0x09", "ExpectedArm64ASM": [ - "orr w26, w7, w5", - "bfxil x7, x26, #0, #16", - "cmn wzr, w26, lsl #16" + "mov x20, x5", + "mov x21, x7", + "orr w22, w21, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x7, x20", + "mov x26, x22", + "cmn wzr, w22, lsl #16" ] }, "or ebx, ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x09", "ExpectedArm64ASM": [ - "orr w7, w7, w5", - "mov x26, x7", - "tst w7, w7" + "mov x20, x5", + "mov x21, x7", + "orr w22, w21, w20", + "mov x7, x22", + "mov x26, x22", + "tst w22, w22" ] }, "or rbx, rcx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x09", "ExpectedArm64ASM": [ - "orr x7, x7, x5", - "mov x26, x7", - "tst x7, x7" + "mov x20, x5", + "mov x21, x7", + "orr x22, x21, x20", + "mov x7, x22", + "mov x26, x22", + "tst x22, x22" ] }, "db 0x0A, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": [ "0x0A", "or bl, cl but modrm.rm as source" ], "ExpectedArm64ASM": [ - "orr w26, w5, w7", - "bfxil x5, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x7", + "mov x21, x5", + "orr w22, w21, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x5, x20", + "mov x26, x22", + "cmn wzr, w22, lsl #24" ] }, "db 0x66, 0x0B, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": [ "0x0B", "or bx, cx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "orr w26, w5, w7", - "bfxil x5, x26, #0, #16", - "cmn wzr, w26, lsl #16" + "mov x20, x7", + "mov x21, x5", + "orr w22, w21, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x5, x20", + "mov x26, x22", + "cmn wzr, w22, lsl #16" ] }, "db 0x0B, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x0B", "or ebx, ecx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "orr w5, w5, w7", - "mov x26, x5", - "tst w5, w5" + "mov x20, x7", + "mov x21, x5", + "orr w22, w21, w20", + "mov x5, x22", + "mov x26, x22", + "tst w22, w22" ] }, "db 0x48, 0x0B, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x0B", "or rbx, rcx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "orr x5, x5, x7", - "mov x26, x5", - "tst x5, x5" + "mov x20, x7", + "mov x21, x5", + "orr x22, x21, x20", + "mov x5, x22", + "mov x26, x22", + "tst x22, x22" ] }, "or al, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x0C", "ExpectedArm64ASM": [ - "orr w26, w4, #0x1", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "orr w21, w20, #0x1", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "or ax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x0D", "ExpectedArm64ASM": [ - "orr w26, w4, #0x1", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16" + "mov x20, x4", + "orr w21, w20, #0x1", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #16" ] }, "or eax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0D", "ExpectedArm64ASM": [ - "orr w4, w4, #0x1", - "mov x26, x4", - "tst w4, w4" + "mov x20, x4", + "orr w21, w20, #0x1", + "mov x4, x21", + "mov x26, x21", + "tst w21, w21" ] }, "or rax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0D", "ExpectedArm64ASM": [ - "orr x4, x4, #0x1", - "mov x26, x4", - "tst x4, x4" + "mov x20, x4", + "orr x21, x20, #0x1", + "mov x4, x21", + "mov x26, x21", + "tst x21, x21" ] }, "or al, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x0C", "ExpectedArm64ASM": [ - "orr w26, w4, #0xff", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "orr w21, w20, #0xff", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "or ax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x0D", "ExpectedArm64ASM": [ - "orr w26, w4, #0xffff", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16" + "mov x20, x4", + "orr w21, w20, #0xffff", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #16" ] }, "or eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0D", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "orr w4, w4, w20", - "mov x26, x4", - "tst w4, w4" + "mov x21, x4", + "orr w22, w21, w20", + "mov x4, x22", + "mov x26, x22", + "tst w22, w22" ] }, "or rax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0D", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "orr x4, x4, x20", - "mov x26, x4", - "tst x4, x4" + "mov x21, x4", + "orr x22, x21, x20", + "mov x4, x22", + "mov x26, x22", + "tst x22, x22" ] }, "adc bl, cl": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 26, "Comment": "0x10", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "cset w20, hs", - "adc w21, w7, w5", - "uxtb w26, w21", - "cmp x26, x5", - "cset x21, lo", - "cmp x26, x5", - "cset x22, ls", - "cmp x20, #0x1 (1)", - "csel x20, x22, x21, eq", - "cmn wzr, w26, lsl #24", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "eor w21, w7, w5", - "eor w22, w26, w7", - "bic w21, w22, w21", - "ubfx x21, x21, #7, #1", - "orr w20, w20, w21, lsl #28", - "bfxil x7, x26, #0, #8", - "msr nzcv, x20" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "cset w22, hs", + "adc w23, w21, w20", + "uxtb w24, w23", + "cmp x24, x20", + "cset x23, lo", + "cmp x24, x20", + "cset x25, ls", + "cmp x22, #0x1 (1)", + "csel x30, x25, x23, eq", + "cmn wzr, w24, lsl #24", + "mrs x22, nzcv", + "orr w23, w22, w30, lsl #29", + "eor w22, w21, w20", + "eor w20, w24, w21", + "bic w25, w20, w22", + "ubfx x20, x25, #7, #1", + "orr w22, w23, w20, lsl #28", + "mov x26, x24", + "mov x20, x21", + "bfxil x20, x24, #0, #8", + "mov x7, x20", + "msr nzcv, x22" ] }, "adc bx, cx": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 26, "Comment": "0x11", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "cset w20, hs", - "adc w21, w7, w5", - "uxth w26, w21", - "cmp x26, x5", - "cset x21, lo", - "cmp x26, x5", - "cset x22, ls", - "cmp x20, #0x1 (1)", - "csel x20, x22, x21, eq", - "cmn wzr, w26, lsl #16", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "eor w21, w7, w5", - "eor w22, w26, w7", - "bic w21, w22, w21", - "ubfx x21, x21, #15, #1", - "orr w20, w20, w21, lsl #28", - "bfxil x7, x26, #0, #16", - "msr nzcv, x20" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "cset w22, hs", + "adc w23, w21, w20", + "uxth w24, w23", + "cmp x24, x20", + "cset x23, lo", + "cmp x24, x20", + "cset x25, ls", + "cmp x22, #0x1 (1)", + "csel x30, x25, x23, eq", + "cmn wzr, w24, lsl #16", + "mrs x22, nzcv", + "orr w23, w22, w30, lsl #29", + "eor w22, w21, w20", + "eor w20, w24, w21", + "bic w25, w20, w22", + "ubfx x20, x25, #15, #1", + "orr w22, w23, w20, lsl #28", + "mov x26, x24", + "mov x20, x21", + "bfxil x20, x24, #0, #16", + "mov x7, x20", + "msr nzcv, x22" ] }, "adc ebx, ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x11", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "adcs w26, w7, w5", - "mov x7, x26" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "adcs w22, w21, w20", + "mov x26, x22", + "mov x7, x22" ] }, "adc rbx, rcx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x11", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "adcs x26, x7, x5", - "mov x7, x26" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "adcs x22, x21, x20", + "mov x26, x22", + "mov x7, x22" ] }, "db 0x12, 0xcb": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 26, "Comment": [ "0x12", "adc bl, cl but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "cset w20, hs", - "adc w21, w5, w7", - "uxtb w26, w21", - "cmp x26, x7", - "cset x21, lo", - "cmp x26, x7", - "cset x22, ls", - "cmp x20, #0x1 (1)", - "csel x20, x22, x21, eq", - "cmn wzr, w26, lsl #24", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "eor w21, w5, w7", - "eor w22, w26, w5", - "bic w21, w22, w21", - "ubfx x21, x21, #7, #1", - "orr w20, w20, w21, lsl #28", - "bfxil x5, x26, #0, #8", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "cset w22, hs", + "adc w23, w21, w20", + "uxtb w24, w23", + "cmp x24, x20", + "cset x23, lo", + "cmp x24, x20", + "cset x25, ls", + "cmp x22, #0x1 (1)", + "csel x30, x25, x23, eq", + "cmn wzr, w24, lsl #24", + "mrs x22, nzcv", + "orr w23, w22, w30, lsl #29", + "eor w22, w21, w20", + "eor w20, w24, w21", + "bic w25, w20, w22", + "ubfx x20, x25, #7, #1", + "orr w22, w23, w20, lsl #28", + "mov x26, x24", + "mov x20, x21", + "bfxil x20, x24, #0, #8", + "mov x5, x20", + "msr nzcv, x22" ] }, "db 0x66, 0x13, 0xcb": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 26, "Comment": [ "0x13", "adc bx, cx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "cset w20, hs", - "adc w21, w5, w7", - "uxth w26, w21", - "cmp x26, x7", - "cset x21, lo", - "cmp x26, x7", - "cset x22, ls", - "cmp x20, #0x1 (1)", - "csel x20, x22, x21, eq", - "cmn wzr, w26, lsl #16", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "eor w21, w5, w7", - "eor w22, w26, w5", - "bic w21, w22, w21", - "ubfx x21, x21, #15, #1", - "orr w20, w20, w21, lsl #28", - "bfxil x5, x26, #0, #16", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "cset w22, hs", + "adc w23, w21, w20", + "uxth w24, w23", + "cmp x24, x20", + "cset x23, lo", + "cmp x24, x20", + "cset x25, ls", + "cmp x22, #0x1 (1)", + "csel x30, x25, x23, eq", + "cmn wzr, w24, lsl #16", + "mrs x22, nzcv", + "orr w23, w22, w30, lsl #29", + "eor w22, w21, w20", + "eor w20, w24, w21", + "bic w25, w20, w22", + "ubfx x20, x25, #15, #1", + "orr w22, w23, w20, lsl #28", + "mov x26, x24", + "mov x20, x21", + "bfxil x20, x24, #0, #16", + "mov x5, x20", + "msr nzcv, x22" ] }, "db 0x13, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0x13", "adc ebx, ecx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "adcs w26, w5, w7", - "mov x5, x26" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "adcs w22, w21, w20", + "mov x26, x22", + "mov x5, x22" ] }, "db 0x48, 0x13, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0x13", "adc rbx, rcx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "adcs x26, x5, x7", - "mov x5, x26" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "adcs x22, x21, x20", + "mov x26, x22", + "mov x5, x22" ] }, "adc al, 1": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 23, "Comment": "0x14", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "cset w21, hs", - "adc w20, w27, w20", - "uxtb w26, w20", - "cmp w26, #0x1 (1)", - "cset x20, lo", - "cmp w26, #0x1 (1)", - "cset x22, ls", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w26, w27", - "ubfx x21, x21, #7, #1", - "orr w20, w20, w21, lsl #28", - "mov x4, x27", - "bfxil x4, x26, #0, #8", - "msr nzcv, x20" + "mov x21, x4", + "mov x27, x21", + "cset w22, hs", + "adc w23, w21, w20", + "uxtb w20, w23", + "cmp w20, #0x1 (1)", + "cset x23, lo", + "cmp w20, #0x1 (1)", + "cset x24, ls", + "cmp x22, #0x1 (1)", + "csel x25, x24, x23, eq", + "cmn wzr, w20, lsl #24", + "mrs x22, nzcv", + "orr w23, w22, w25, lsl #29", + "bic w22, w20, w21", + "ubfx x24, x22, #7, #1", + "orr w22, w23, w24, lsl #28", + "mov x26, x20", + "mov x23, x21", + "bfxil x23, x20, #0, #8", + "mov x4, x23", + "msr nzcv, x22" ] }, "adc ax, 1": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 23, "Comment": "0x15", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "cset w21, hs", - "adc w20, w27, w20", - "uxth w26, w20", - "cmp w26, #0x1 (1)", - "cset x20, lo", - "cmp w26, #0x1 (1)", - "cset x22, ls", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #16", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w26, w27", - "ubfx x21, x21, #15, #1", - "orr w20, w20, w21, lsl #28", - "mov x4, x27", - "bfxil x4, x26, #0, #16", - "msr nzcv, x20" + "mov x21, x4", + "mov x27, x21", + "cset w22, hs", + "adc w23, w21, w20", + "uxth w20, w23", + "cmp w20, #0x1 (1)", + "cset x23, lo", + "cmp w20, #0x1 (1)", + "cset x24, ls", + "cmp x22, #0x1 (1)", + "csel x25, x24, x23, eq", + "cmn wzr, w20, lsl #16", + "mrs x22, nzcv", + "orr w23, w22, w25, lsl #29", + "bic w22, w20, w21", + "ubfx x24, x22, #15, #1", + "orr w22, w23, w24, lsl #28", + "mov x26, x20", + "mov x23, x21", + "bfxil x23, x20, #0, #16", + "mov x4, x23", + "msr nzcv, x22" ] }, "adc eax, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x15", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "adcs w26, w27, w20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adcs w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "adc rax, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x15", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "adcs x26, x27, x20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adcs x22, x21, x20", + "mov x26, x22", + "mov x4, x22" ] }, "adc al, -1": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 24, "Comment": "0x14", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "cset w21, hs", - "adc w20, w4, w20", - "uxtb w26, w20", - "cmp w26, #0xff (255)", - "cset x20, lo", - "cmp w26, #0xff (255)", - "cset x22, ls", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w4, w26", - "ubfx x21, x21, #7, #1", - "orr w20, w20, w21, lsl #28", - "bfxil x4, x26, #0, #8", - "msr nzcv, x20" + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "cset w22, hs", + "adc w23, w21, w20", + "uxtb w20, w23", + "cmp w20, #0xff (255)", + "cset x23, lo", + "cmp w20, #0xff (255)", + "cset x24, ls", + "cmp x22, #0x1 (1)", + "csel x25, x24, x23, eq", + "cmn wzr, w20, lsl #24", + "mrs x22, nzcv", + "orr w23, w22, w25, lsl #29", + "bic w22, w21, w20", + "ubfx x24, x22, #7, #1", + "orr w22, w23, w24, lsl #28", + "mov x26, x20", + "mov x23, x21", + "bfxil x23, x20, #0, #8", + "mov x4, x23", + "msr nzcv, x22" ] }, "adc ax, -1": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 24, "Comment": "0x15", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "mvn w27, w4", - "cset w21, hs", - "adc w22, w4, w20", - "uxth w26, w22", - "cmp w26, w20", - "cset x22, lo", - "cmp w26, w20", - "cset x20, ls", - "cmp x21, #0x1 (1)", - "csel x20, x20, x22, eq", - "cmn wzr, w26, lsl #16", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w4, w26", - "ubfx x21, x21, #15, #1", - "orr w20, w20, w21, lsl #28", - "bfxil x4, x26, #0, #16", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "cset w22, hs", + "adc w23, w21, w20", + "uxth w24, w23", + "cmp w24, w20", + "cset x23, lo", + "cmp w24, w20", + "cset x25, ls", + "cmp x22, #0x1 (1)", + "csel x20, x25, x23, eq", + "cmn wzr, w24, lsl #16", + "mrs x22, nzcv", + "orr w23, w22, w20, lsl #29", + "bic w20, w21, w24", + "ubfx x22, x20, #15, #1", + "orr w20, w23, w22, lsl #28", + "mov x26, x24", + "mov x22, x21", + "bfxil x22, x24, #0, #16", + "mov x4, x22", "msr nzcv, x20" ] }, "adc eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x15", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "adcs w26, w4, w20", - "mov x4, x26" + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "adcs w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "adc rax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x15", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "mvn w27, w4", - "adcs x26, x4, x20", - "mov x4, x26" + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "adcs x22, x21, x20", + "mov x26, x22", + "mov x4, x22" ] }, "sbb bl, cl": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 27, "Comment": "0x18", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "cset w20, hs", - "add w21, w5, w20", - "sub w21, w7, w21", - "uxtb w26, w21", - "cmp x26, x7", - "cset x21, hi", - "cmp x26, x7", - "cset x22, hs", - "cmp x20, #0x1 (1)", - "csel x20, x22, x21, eq", - "cmn wzr, w26, lsl #24", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "eor w21, w7, w5", - "eor w22, w26, w7", - "and w21, w22, w21", - "ubfx x21, x21, #7, #1", - "orr w20, w20, w21, lsl #28", - "bfxil x7, x26, #0, #8", - "msr nzcv, x20" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "cset w22, hs", + "add w23, w20, w22", + "sub w24, w21, w23", + "uxtb w23, w24", + "cmp x23, x21", + "cset x24, hi", + "cmp x23, x21", + "cset x25, hs", + "cmp x22, #0x1 (1)", + "csel x30, x25, x24, eq", + "cmn wzr, w23, lsl #24", + "mrs x22, nzcv", + "orr w24, w22, w30, lsl #29", + "eor w22, w21, w20", + "eor w20, w23, w21", + "and w25, w20, w22", + "ubfx x20, x25, #7, #1", + "orr w22, w24, w20, lsl #28", + "mov x26, x23", + "mov x20, x21", + "bfxil x20, x23, #0, #8", + "mov x7, x20", + "msr nzcv, x22" ] }, "sbb bx, cx": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 27, "Comment": "0x19", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "cset w20, hs", - "add w21, w5, w20", - "sub w21, w7, w21", - "uxth w26, w21", - "cmp x26, x7", - "cset x21, hi", - "cmp x26, x7", - "cset x22, hs", - "cmp x20, #0x1 (1)", - "csel x20, x22, x21, eq", - "cmn wzr, w26, lsl #16", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "eor w21, w7, w5", - "eor w22, w26, w7", - "and w21, w22, w21", - "ubfx x21, x21, #15, #1", - "orr w20, w20, w21, lsl #28", - "bfxil x7, x26, #0, #16", - "msr nzcv, x20" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "cset w22, hs", + "add w23, w20, w22", + "sub w24, w21, w23", + "uxth w23, w24", + "cmp x23, x21", + "cset x24, hi", + "cmp x23, x21", + "cset x25, hs", + "cmp x22, #0x1 (1)", + "csel x30, x25, x24, eq", + "cmn wzr, w23, lsl #16", + "mrs x22, nzcv", + "orr w24, w22, w30, lsl #29", + "eor w22, w21, w20", + "eor w20, w23, w21", + "and w25, w20, w22", + "ubfx x20, x25, #15, #1", + "orr w22, w24, w20, lsl #28", + "mov x26, x23", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x7, x20", + "msr nzcv, x22" ] }, "sbb ebx, ecx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": "0x19", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sbcs w26, w7, w5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "sbcs w22, w21, w20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x7, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x26, x22", + "mov x7, x22", + "msr nzcv, x21" ] }, "sbb rbx, rcx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": "0x19", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sbcs x26, x7, x5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "sbcs x22, x21, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x7, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x26, x22", + "mov x7, x22", + "msr nzcv, x21" ] }, "db 0x1A, 0xcb": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 27, "Comment": [ "0x1A", "sbb bl, cl but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "cset w20, hs", - "add w21, w7, w20", - "sub w21, w5, w21", - "uxtb w26, w21", - "cmp x26, x5", - "cset x21, hi", - "cmp x26, x5", - "cset x22, hs", - "cmp x20, #0x1 (1)", - "csel x20, x22, x21, eq", - "cmn wzr, w26, lsl #24", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "eor w21, w5, w7", - "eor w22, w26, w5", - "and w21, w22, w21", - "ubfx x21, x21, #7, #1", - "orr w20, w20, w21, lsl #28", - "bfxil x5, x26, #0, #8", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "cset w22, hs", + "add w23, w20, w22", + "sub w24, w21, w23", + "uxtb w23, w24", + "cmp x23, x21", + "cset x24, hi", + "cmp x23, x21", + "cset x25, hs", + "cmp x22, #0x1 (1)", + "csel x30, x25, x24, eq", + "cmn wzr, w23, lsl #24", + "mrs x22, nzcv", + "orr w24, w22, w30, lsl #29", + "eor w22, w21, w20", + "eor w20, w23, w21", + "and w25, w20, w22", + "ubfx x20, x25, #7, #1", + "orr w22, w24, w20, lsl #28", + "mov x26, x23", + "mov x20, x21", + "bfxil x20, x23, #0, #8", + "mov x5, x20", + "msr nzcv, x22" ] }, "db 0x66, 0x1B, 0xcb": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 27, "Comment": [ "0x1B", "sbb bx, cx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "cset w20, hs", - "add w21, w7, w20", - "sub w21, w5, w21", - "uxth w26, w21", - "cmp x26, x5", - "cset x21, hi", - "cmp x26, x5", - "cset x22, hs", - "cmp x20, #0x1 (1)", - "csel x20, x22, x21, eq", - "cmn wzr, w26, lsl #16", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "eor w21, w5, w7", - "eor w22, w26, w5", - "and w21, w22, w21", - "ubfx x21, x21, #15, #1", - "orr w20, w20, w21, lsl #28", - "bfxil x5, x26, #0, #16", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "cset w22, hs", + "add w23, w20, w22", + "sub w24, w21, w23", + "uxth w23, w24", + "cmp x23, x21", + "cset x24, hi", + "cmp x23, x21", + "cset x25, hs", + "cmp x22, #0x1 (1)", + "csel x30, x25, x24, eq", + "cmn wzr, w23, lsl #16", + "mrs x22, nzcv", + "orr w24, w22, w30, lsl #29", + "eor w22, w21, w20", + "eor w20, w23, w21", + "and w25, w20, w22", + "ubfx x20, x25, #15, #1", + "orr w22, w24, w20, lsl #28", + "mov x26, x23", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x5, x20", + "msr nzcv, x22" ] }, "db 0x1B, 0xcb": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": [ "0x1B", "sbb ebx, ecx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sbcs w26, w5, w7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "sbcs w22, w21, w20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x5, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x26, x22", + "mov x5, x22", + "msr nzcv, x21" ] }, "db 0x48, 0x1B, 0xcb": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": [ "0x1B", "sbb rbx, rcx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sbcs x26, x5, x7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "sbcs x22, x21, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x5, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x26, x22", + "mov x5, x22", + "msr nzcv, x21" ] }, "sbb al, 1": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 24, "Comment": "0x1C", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "cset w21, hs", - "add w20, w20, w21", - "sub w20, w27, w20", - "uxtb w26, w20", - "cmp x26, x27", + "mov x21, x4", + "mov x27, x21", + "cset w22, hs", + "add w23, w20, w22", + "sub w20, w21, w23", + "uxtb w23, w20", + "cmp x23, x21", "cset x20, hi", - "cmp x26, x27", - "cset x22, hs", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w27, w26", - "ubfx x21, x21, #7, #1", - "orr w20, w20, w21, lsl #28", - "mov x4, x27", - "bfxil x4, x26, #0, #8", + "cmp x23, x21", + "cset x24, hs", + "cmp x22, #0x1 (1)", + "csel x25, x24, x20, eq", + "cmn wzr, w23, lsl #24", + "mrs x20, nzcv", + "orr w22, w20, w25, lsl #29", + "bic w20, w21, w23", + "ubfx x24, x20, #7, #1", + "orr w20, w22, w24, lsl #28", + "mov x26, x23", + "mov x22, x21", + "bfxil x22, x23, #0, #8", + "mov x4, x22", "msr nzcv, x20" ] }, "sbb ax, 1": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 24, "Comment": "0x1D", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "cset w21, hs", - "add w20, w20, w21", - "sub w20, w27, w20", - "uxth w26, w20", - "cmp x26, x27", + "mov x21, x4", + "mov x27, x21", + "cset w22, hs", + "add w23, w20, w22", + "sub w20, w21, w23", + "uxth w23, w20", + "cmp x23, x21", "cset x20, hi", - "cmp x26, x27", - "cset x22, hs", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #16", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w27, w26", - "ubfx x21, x21, #15, #1", - "orr w20, w20, w21, lsl #28", - "mov x4, x27", - "bfxil x4, x26, #0, #16", + "cmp x23, x21", + "cset x24, hs", + "cmp x22, #0x1 (1)", + "csel x25, x24, x20, eq", + "cmn wzr, w23, lsl #16", + "mrs x20, nzcv", + "orr w22, w20, w25, lsl #29", + "bic w20, w21, w23", + "ubfx x24, x20, #15, #1", + "orr w20, w22, w24, lsl #28", + "mov x26, x23", + "mov x22, x21", + "bfxil x22, x23, #0, #16", + "mov x4, x22", "msr nzcv, x20" ] }, "sbb eax, 1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "0x1D", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs w26, w27, w20", + "mov x21, x4", + "mov x27, x21", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "sbcs w22, w21, w20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x26, x22", + "mov x4, x22", + "msr nzcv, x21" ] }, "sbb rax, 1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "0x1D", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs x26, x27, x20", + "mov x21, x4", + "mov x27, x21", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "sbcs x22, x21, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x26, x22", + "mov x4, x22", + "msr nzcv, x21" ] }, "sbb al, -1": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 25, "Comment": "0x1C", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "cset w21, hs", - "add w20, w20, w21", - "sub w20, w4, w20", - "uxtb w26, w20", - "cmp x26, x4", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "cset w22, hs", + "add w23, w20, w22", + "sub w20, w21, w23", + "uxtb w23, w20", + "cmp x23, x21", "cset x20, hi", - "cmp x26, x4", - "cset x22, hs", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w26, w4", - "ubfx x21, x21, #7, #1", - "orr w20, w20, w21, lsl #28", - "bfxil x4, x26, #0, #8", + "cmp x23, x21", + "cset x24, hs", + "cmp x22, #0x1 (1)", + "csel x25, x24, x20, eq", + "cmn wzr, w23, lsl #24", + "mrs x20, nzcv", + "orr w22, w20, w25, lsl #29", + "bic w20, w23, w21", + "ubfx x24, x20, #7, #1", + "orr w20, w22, w24, lsl #28", + "mov x26, x23", + "mov x22, x21", + "bfxil x22, x23, #0, #8", + "mov x4, x22", "msr nzcv, x20" ] }, "sbb ax, -1": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 25, "Comment": "0x1D", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "mvn w27, w4", - "cset w21, hs", - "add w20, w20, w21", - "sub w20, w4, w20", - "uxth w26, w20", - "cmp x26, x4", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "cset w22, hs", + "add w23, w20, w22", + "sub w20, w21, w23", + "uxth w23, w20", + "cmp x23, x21", "cset x20, hi", - "cmp x26, x4", - "cset x22, hs", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #16", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w26, w4", - "ubfx x21, x21, #15, #1", - "orr w20, w20, w21, lsl #28", - "bfxil x4, x26, #0, #16", + "cmp x23, x21", + "cset x24, hs", + "cmp x22, #0x1 (1)", + "csel x25, x24, x20, eq", + "cmn wzr, w23, lsl #16", + "mrs x20, nzcv", + "orr w22, w20, w25, lsl #29", + "bic w20, w23, w21", + "ubfx x24, x20, #15, #1", + "orr w20, w22, w24, lsl #28", + "mov x26, x23", + "mov x22, x21", + "bfxil x22, x23, #0, #16", + "mov x4, x22", "msr nzcv, x20" ] }, "sbb eax, -1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 13, "Comment": "0x1D", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs w26, w4, w20", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "sbcs w22, w21, w20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x26, x22", + "mov x4, x22", + "msr nzcv, x21" ] }, "sbb rax, -1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 13, "Comment": "0x1D", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "mvn w27, w4", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs x26, x4, x20", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "sbcs x22, x21, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x26, x22", + "mov x4, x22", + "msr nzcv, x21" ] }, "and bl, cl": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0x20", "ExpectedArm64ASM": [ - "and w26, w7, w5", - "cmn wzr, w26, lsl #24", - "bfxil x7, x26, #0, #8" + "mov x20, x5", + "mov x21, x7", + "and w22, w21, w20", + "cmn wzr, w22, lsl #24", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x7, x20" ] }, "and bx, cx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0x21", "ExpectedArm64ASM": [ - "and w26, w7, w5", - "cmn wzr, w26, lsl #16", - "bfxil x7, x26, #0, #16" + "mov x20, x5", + "mov x21, x7", + "and w22, w21, w20", + "cmn wzr, w22, lsl #16", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x7, x20" ] }, "and ebx, ecx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x21", "ExpectedArm64ASM": [ - "ands w26, w7, w5", - "mov x7, x26" + "mov x20, x5", + "mov x21, x7", + "ands w22, w21, w20", + "mov x26, x22", + "mov x7, x22" ] }, "and rbx, rcx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x21", "ExpectedArm64ASM": [ - "ands x26, x7, x5", - "mov x7, x26" + "mov x20, x5", + "mov x21, x7", + "ands x22, x21, x20", + "mov x26, x22", + "mov x7, x22" ] }, "db 0x22, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": [ "0x22", "and bl, cl but modrm.rm as source" ], "ExpectedArm64ASM": [ - "and w26, w5, w7", - "cmn wzr, w26, lsl #24", - "bfxil x5, x26, #0, #8" - ] + "mov x20, x7", + "mov x21, x5", + "and w22, w21, w20", + "cmn wzr, w22, lsl #24", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x5, x20" + ] }, "db 0x66, 0x23, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": [ "0x23", "and bx, cx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "and w26, w5, w7", - "cmn wzr, w26, lsl #16", - "bfxil x5, x26, #0, #16" + "mov x20, x7", + "mov x21, x5", + "and w22, w21, w20", + "cmn wzr, w22, lsl #16", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x5, x20" ] }, "db 0x23, 0xcb": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "0x23", "and ebx, ecx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "ands w26, w5, w7", - "mov x5, x26" + "mov x20, x7", + "mov x21, x5", + "ands w22, w21, w20", + "mov x26, x22", + "mov x5, x22" ] }, "db 0x48, 0x23, 0xcb": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "0x23", "and rbx, rcx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "ands x26, x5, x7", - "mov x5, x26" + "mov x20, x7", + "mov x21, x5", + "ands x22, x21, x20", + "mov x26, x22", + "mov x5, x22" ] }, "and al, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x24", "ExpectedArm64ASM": [ - "and w26, w4, #0x1", - "cmn wzr, w26, lsl #24", - "bfxil x4, x26, #0, #8" + "mov x20, x4", + "and w21, w20, #0x1", + "cmn wzr, w21, lsl #24", + "mov x26, x21", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22" ] }, "and ax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x25", "ExpectedArm64ASM": [ - "and w26, w4, #0x1", - "cmn wzr, w26, lsl #16", - "bfxil x4, x26, #0, #16" + "mov x20, x4", + "and w21, w20, #0x1", + "cmn wzr, w21, lsl #16", + "mov x26, x21", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "and eax, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x25", "ExpectedArm64ASM": [ - "ands w26, w4, #0x1", - "mov x4, x26" + "mov x20, x4", + "ands w21, w20, #0x1", + "mov x26, x21", + "mov x4, x21" ] }, "and rax, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x25", "ExpectedArm64ASM": [ - "ands x26, x4, #0x1", - "mov x4, x26" + "mov x20, x4", + "ands x21, x20, #0x1", + "mov x26, x21", + "mov x4, x21" ] }, "and al, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x24", "ExpectedArm64ASM": [ - "and w26, w4, #0xff", - "cmn wzr, w26, lsl #24", - "bfxil x4, x26, #0, #8" + "mov x20, x4", + "and w21, w20, #0xff", + "cmn wzr, w21, lsl #24", + "mov x26, x21", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22" ] }, "and ax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x25", "ExpectedArm64ASM": [ - "and w26, w4, #0xffff", - "cmn wzr, w26, lsl #16", - "bfxil x4, x26, #0, #16" + "mov x20, x4", + "and w21, w20, #0xffff", + "cmn wzr, w21, lsl #16", + "mov x26, x21", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "and eax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x25", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "ands w26, w4, w20", - "mov x4, x26" + "mov x21, x4", + "ands w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "and rax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x25", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "ands x26, x4, x20", - "mov x4, x26" + "mov x21, x4", + "ands x22, x21, x20", + "mov x26, x22", + "mov x4, x22" ] }, "sub bl, cl": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 14, "Comment": "0x28", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "lsl w0, w7, #24", - "cmp w0, w5, lsl #24", - "sub w26, w7, w5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #24", + "cmp w0, w20, lsl #24", + "sub w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "bfxil x7, x26, #0, #8", - "msr nzcv, x20" + "eor w23, w20, #0x20000000", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x7, x20", + "msr nzcv, x23" ] }, "sub bx, cx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 14, "Comment": "0x29", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "lsl w0, w7, #16", - "cmp w0, w5, lsl #16", - "sub w26, w7, w5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #16", + "cmp w0, w20, lsl #16", + "sub w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "bfxil x7, x26, #0, #16", - "msr nzcv, x20" + "eor w23, w20, #0x20000000", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x7, x20", + "msr nzcv, x23" ] }, "sub ebx, ecx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": "0x29", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "subs w26, w7, w5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x7, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x7, x22", + "msr nzcv, x21" ] }, "sub rbx, rcx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": "0x29", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "subs x26, x7, x5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "subs x22, x21, x20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x7, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x7, x22", + "msr nzcv, x21" ] }, "db 0x2A, 0xcb": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 14, "Comment": [ "0x2A", "sub bl, cl but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "lsl w0, w5, #24", - "cmp w0, w7, lsl #24", - "sub w26, w5, w7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #24", + "cmp w0, w20, lsl #24", + "sub w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "bfxil x5, x26, #0, #8", - "msr nzcv, x20" + "eor w23, w20, #0x20000000", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x5, x20", + "msr nzcv, x23" ] }, "db 0x66, 0x2B, 0xcb": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 14, "Comment": [ "0x2B", "sub bx, cx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "lsl w0, w5, #16", - "cmp w0, w7, lsl #16", - "sub w26, w5, w7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #16", + "cmp w0, w20, lsl #16", + "sub w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "bfxil x5, x26, #0, #16", - "msr nzcv, x20" + "eor w23, w20, #0x20000000", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x5, x20", + "msr nzcv, x23" ] }, "db 0x2B, 0xcb": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "0x2B", "sub ebx, ecx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "subs w26, w5, w7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x5, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x5, x22", + "msr nzcv, x21" ] }, "db 0x48, 0x2B, 0xcb": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "0x2B", "sub rbx, rcx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "subs x26, x5, x7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "subs x22, x21, x20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x5, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x5, x22", + "msr nzcv, x21" ] }, "sub al, 1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 13, "Comment": "0x2C", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #24", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #24", "cmp w0, w20, lsl #24", - "sub w26, w27, #0x1 (1)", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x27", - "bfxil x4, x26, #0, #8", - "msr nzcv, x20" + "sub w20, w21, #0x1 (1)", + "mov x26, x20", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22", + "msr nzcv, x23" ] }, "sub ax, 1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 13, "Comment": "0x2D", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #16", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #16", "cmp w0, w20, lsl #16", - "sub w26, w27, #0x1 (1)", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x27", - "bfxil x4, x26, #0, #16", - "msr nzcv, x20" + "sub w20, w21, #0x1 (1)", + "mov x26, x20", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22", + "msr nzcv, x23" ] }, "sub eax, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "0x2D", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs w26, w27, #0x1 (1)", + "mov x20, x4", + "mov x27, x20", + "subs w21, w20, #0x1 (1)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x4, x21", + "msr nzcv, x22" ] }, "sub rax, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "0x2D", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs x26, x27, #0x1 (1)", + "mov x20, x4", + "mov x27, x20", + "subs x21, x20, #0x1 (1)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x4, x21", + "msr nzcv, x22" ] }, "sub al, -1": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 14, "Comment": "0x2C", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "lsl w0, w4, #24", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #24", "cmp w0, w20, lsl #24", - "sub w26, w4, #0xff (255)", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "bfxil x4, x26, #0, #8", - "msr nzcv, x20" + "sub w20, w21, #0xff (255)", + "mov x26, x20", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22", + "msr nzcv, x23" ] }, "sub ax, -1": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 14, "Comment": "0x2D", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "mvn w27, w4", - "lsl w0, w4, #16", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #16", "cmp w0, w20, lsl #16", - "sub w26, w4, w20", + "sub w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "bfxil x4, x26, #0, #16", - "msr nzcv, x20" + "eor w23, w20, #0x20000000", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x4, x20", + "msr nzcv, x23" ] }, "sub eax, -1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "0x2D", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "subs w26, w4, w20", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x4, x22", + "msr nzcv, x21" ] }, "sub rax, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "0x2D", "ExpectedArm64ASM": [ - "mvn w27, w4", - "adds x26, x4, #0x1 (1)", + "mov x20, x4", + "mvn w21, w20", + "mov x27, x21", + "adds x21, x20, #0x1 (1)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x4, x21", + "msr nzcv, x22" ] }, "xor bl, cl": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0x30", "ExpectedArm64ASM": [ - "eor w26, w7, w5", - "bfxil x7, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x7, x20", + "mov x26, x22", + "cmn wzr, w22, lsl #24" ] }, "xor bx, cx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0x31", "ExpectedArm64ASM": [ - "eor w26, w7, w5", - "bfxil x7, x26, #0, #16", - "cmn wzr, w26, lsl #16" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x7, x20", + "mov x26, x22", + "cmn wzr, w22, lsl #16" ] }, "xor ebx, ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x31", "ExpectedArm64ASM": [ - "eor w7, w7, w5", - "mov x26, x7", - "tst w7, w7" + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x7, x22", + "mov x26, x22", + "tst w22, w22" ] }, "xor rbx, rcx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x31", "ExpectedArm64ASM": [ - "eor x7, x7, x5", - "mov x26, x7", - "tst x7, x7" + "mov x20, x5", + "mov x21, x7", + "eor x22, x21, x20", + "mov x7, x22", + "mov x26, x22", + "tst x22, x22" ] }, "db 0x32, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": [ "0x32", "xor bl, cl but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w26, w5, w7", - "bfxil x5, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x5, x20", + "mov x26, x22", + "cmn wzr, w22, lsl #24" ] }, "db 0x66, 0x33, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": [ "0x33", "xor bx, cx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w26, w5, w7", - "bfxil x5, x26, #0, #16", - "cmn wzr, w26, lsl #16" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x5, x20", + "mov x26, x22", + "cmn wzr, w22, lsl #16" ] }, "db 0x33, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x33", "xor ebx, ecx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w5, w5, w7", - "mov x26, x5", - "tst w5, w5" + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x5, x22", + "mov x26, x22", + "tst w22, w22" ] }, "db 0x48, 0x33, 0xcb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x33", "xor rbx, rcx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor x5, x5, x7", - "mov x26, x5", - "tst x5, x5" + "mov x20, x7", + "mov x21, x5", + "eor x22, x21, x20", + "mov x5, x22", + "mov x26, x22", + "tst x22, x22" ] }, "xor al, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x34", "ExpectedArm64ASM": [ - "eor w26, w4, #0x1", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "eor w21, w20, #0x1", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "xor ax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x35", "ExpectedArm64ASM": [ - "eor w26, w4, #0x1", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16" + "mov x20, x4", + "eor w21, w20, #0x1", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #16" ] }, "xor eax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x35", "ExpectedArm64ASM": [ - "eor w4, w4, #0x1", - "mov x26, x4", - "tst w4, w4" + "mov x20, x4", + "eor w21, w20, #0x1", + "mov x4, x21", + "mov x26, x21", + "tst w21, w21" ] }, "xor rax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x35", "ExpectedArm64ASM": [ - "eor x4, x4, #0x1", - "mov x26, x4", - "tst x4, x4" + "mov x20, x4", + "eor x21, x20, #0x1", + "mov x4, x21", + "mov x26, x21", + "tst x21, x21" ] }, "cmp bl, cl": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x38", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "lsl w0, w7, #24", - "cmp w0, w5, lsl #24", - "sub w26, w7, w5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #24", + "cmp w0, w20, lsl #24", + "sub w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "xor al, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x34", "ExpectedArm64ASM": [ - "eor w26, w4, #0xff", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "eor w21, w20, #0xff", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "xor ax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x35", "ExpectedArm64ASM": [ - "eor w26, w4, #0xffff", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16" + "mov x20, x4", + "eor w21, w20, #0xffff", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #16" ] }, "xor eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x35", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "eor w4, w4, w20", - "mov x26, x4", - "tst w4, w4" + "mov x21, x4", + "eor w22, w21, w20", + "mov x4, x22", + "mov x26, x22", + "tst w22, w22" ] }, "xor rax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x35", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "eor x4, x4, x20", - "mov x26, x4", - "tst x4, x4" + "mov x21, x4", + "eor x22, x21, x20", + "mov x4, x22", + "mov x26, x22", + "tst x22, x22" ] }, "cmp bx, cx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x39", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "lsl w0, w7, #16", - "cmp w0, w5, lsl #16", - "sub w26, w7, w5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #16", + "cmp w0, w20, lsl #16", + "sub w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmp ebx, ecx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x39", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "subs w26, w7, w5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmp rbx, rcx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x39", "ExpectedArm64ASM": [ - "eor w27, w7, w5", - "subs x26, x7, x5", + "mov x20, x5", + "mov x21, x7", + "eor w22, w21, w20", + "mov x27, x22", + "subs x22, x21, x20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "db 0x3A, 0xcb": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "0x3A", "cmp bl, cl but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "lsl w0, w5, #24", - "cmp w0, w7, lsl #24", - "sub w26, w5, w7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #24", + "cmp w0, w20, lsl #24", + "sub w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "db 0x66, 0x3B, 0xcb": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "0x3B", "cmp bx, cx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "lsl w0, w5, #16", - "cmp w0, w7, lsl #16", - "sub w26, w5, w7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #16", + "cmp w0, w20, lsl #16", + "sub w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "db 0x3B, 0xcb": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0x3B", "cmp ebx, ecx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "subs w26, w5, w7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "db 0x48, 0x3B, 0xcb": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0x3B", "cmp rbx, rcx but modrm.rm as source" ], "ExpectedArm64ASM": [ - "eor w27, w5, w7", - "subs x26, x5, x7", + "mov x20, x7", + "mov x21, x5", + "eor w22, w21, w20", + "mov x27, x22", + "subs x22, x21, x20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmp al, 1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x3C", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #24", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #24", "cmp w0, w20, lsl #24", - "sub w26, w27, #0x1 (1)", + "sub w20, w21, #0x1 (1)", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmp ax, 1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x3D", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #16", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #16", "cmp w0, w20, lsl #16", - "sub w26, w27, #0x1 (1)", + "sub w20, w21, #0x1 (1)", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmp eax, 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x3D", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs w26, w27, #0x1 (1)", + "mov x20, x4", + "mov x27, x20", + "subs w21, w20, #0x1 (1)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmp rax, 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x3D", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs x26, x27, #0x1 (1)", + "mov x20, x4", + "mov x27, x20", + "subs x21, x20, #0x1 (1)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmp al, -1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "0x3C", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "lsl w0, w4, #24", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #24", "cmp w0, w20, lsl #24", - "sub w26, w4, #0xff (255)", + "sub w20, w21, #0xff (255)", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmp ax, -1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "0x3D", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "mvn w27, w4", - "lsl w0, w4, #16", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #16", "cmp w0, w20, lsl #16", - "sub w26, w4, w20", + "sub w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmp eax, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "0x3D", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "subs w26, w4, w20", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmp rax, -1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x3D", "ExpectedArm64ASM": [ - "mvn w27, w4", - "adds x26, x4, #0x1 (1)", + "mov x20, x4", + "mvn w21, w20", + "mov x27, x21", + "adds x21, x20, #0x1 (1)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "push ax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x50", "ExpectedArm64ASM": [ - "strh w4, [x8, #-2]!" + "mov x20, x4", + "mov x21, x8", + "mov x22, x21", + "strh w20, [x22, #-2]!", + "mov x8, x22" ] }, "push rax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x50", "ExpectedArm64ASM": [ - "str x4, [x8, #-8]!" + "mov x20, x4", + "mov x21, x8", + "mov x22, x21", + "str x20, [x22, #-8]!", + "mov x8, x22" ] }, "pop ax": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0x8f", "ExpectedArm64ASM": [ - "ldrh w20, [x8]", - "add x8, x8, #0x2 (2)", - "bfxil x4, x20, #0, #16" + "mov x20, x8", + "ldrh w21, [x20]", + "add x22, x20, #0x2 (2)", + "mov x8, x22", + "mov x20, x4", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "pop rax": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x8f", "ExpectedArm64ASM": [ - "ldr x4, [x8]", - "add x8, x8, #0x8 (8)" + "mov x20, x8", + "ldr x21, [x20]", + "add x22, x20, #0x8 (8)", + "mov x8, x22", + "mov x4, x21" ] }, "movsxd rax, ebx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x63", "ExpectedArm64ASM": [ - "mov w20, w7", - "sxtw x4, w20" + "mov x20, x7", + "mov w21, w20", + "sxtw x20, w21", + "mov x4, x20" ] }, "push word 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x68", "ExpectedArm64ASM": [ "mov w20, #0x1", - "strh w20, [x8, #-2]!" + "mov x21, x8", + "mov x22, x21", + "strh w20, [x22, #-2]!", + "mov x8, x22" ] }, "push qword 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x68", "ExpectedArm64ASM": [ "mov w20, #0x1", - "str x20, [x8, #-8]!" + "mov x21, x8", + "mov x22, x21", + "str x20, [x22, #-8]!", + "mov x8, x22" ] }, "imul ax, bx, 257": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": "0x69", "ExpectedArm64ASM": [ - "sxth x20, w7", - "mov w21, #0x101", - "mul x20, x20, x21", - "sbfx x21, x20, #16, #16", - "bfxil x4, x20, #0, #16", - "sbfx x20, x20, #15, #1", - "cmp x21, x20", + "mov x20, x7", + "sxth x21, w20", + "mov w20, #0x101", + "mul x22, x21, x20", + "sbfx x20, x22, #16, #16", + "mov x21, x4", + "mov x23, x21", + "bfxil x23, x22, #0, #16", + "mov x4, x23", + "sbfx x21, x22, #15, #1", + "cmp x20, x21", "ccmn xzr, #0, #nzCV, eq" ] }, "imul eax, ebx, 257": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x69", "ExpectedArm64ASM": [ - "mov w20, #0x101", - "smull x21, w7, w20", - "asr x21, x21, #32", - "mul w4, w7, w20", - "sbfx x20, x4, #31, #1", - "cmp x21, x20", + "mov x20, x7", + "mov w21, #0x101", + "smull x22, w20, w21", + "asr x23, x22, #32", + "mul w22, w20, w21", + "mov x4, x22", + "sbfx x20, x22, #31, #1", + "cmp x23, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "imul rax, rbx, 257": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "0x69", "ExpectedArm64ASM": [ - "mov w20, #0x101", - "smulh x21, x7, x20", - "mul x4, x7, x20", - "asr x20, x4, #63", - "cmp x21, x20", + "mov x20, x7", + "mov w21, #0x101", + "smulh x22, x20, x21", + "mul x23, x20, x21", + "mov x4, x23", + "asr x20, x23, #63", + "cmp x22, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "push word -1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x6a", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "strh w20, [x8, #-2]!" + "mov x21, x8", + "mov x22, x21", + "strh w20, [x22, #-2]!", + "mov x8, x22" ] }, "push dword -1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x6a", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "str x20, [x8, #-8]!" + "mov x21, x8", + "mov x22, x21", + "str x20, [x22, #-8]!", + "mov x8, x22" ] }, "push qword -1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x6a", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "str x20, [x8, #-8]!" + "mov x21, x8", + "mov x22, x21", + "str x20, [x22, #-8]!", + "mov x8, x22" ] }, "imul ax, bx, 3": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": "0x6b", "ExpectedArm64ASM": [ - "sxth x20, w7", - "mov w21, #0x3", - "mul x20, x20, x21", - "sbfx x21, x20, #16, #16", - "bfxil x4, x20, #0, #16", - "sbfx x20, x20, #15, #1", - "cmp x21, x20", + "mov x20, x7", + "sxth x21, w20", + "mov w20, #0x3", + "mul x22, x21, x20", + "sbfx x20, x22, #16, #16", + "mov x21, x4", + "mov x23, x21", + "bfxil x23, x22, #0, #16", + "mov x4, x23", + "sbfx x21, x22, #15, #1", + "cmp x20, x21", "ccmn xzr, #0, #nzCV, eq" ] }, "imul eax, ebx, 3": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x6b", "ExpectedArm64ASM": [ - "mov w20, #0x3", - "smull x21, w7, w20", - "asr x21, x21, #32", - "mul w4, w7, w20", - "sbfx x20, x4, #31, #1", - "cmp x21, x20", + "mov x20, x7", + "mov w21, #0x3", + "smull x22, w20, w21", + "asr x23, x22, #32", + "mul w22, w20, w21", + "mov x4, x22", + "sbfx x20, x22, #31, #1", + "cmp x23, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "imul rax, rbx, 3": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "0x6b", "ExpectedArm64ASM": [ - "mov w20, #0x3", - "smulh x21, x7, x20", - "mul x4, x7, x20", - "asr x20, x4, #63", - "cmp x21, x20", + "mov x20, x7", + "mov w21, #0x3", + "smulh x22, x20, x21", + "mul x23, x20, x21", + "mov x4, x23", + "asr x20, x23, #63", + "cmp x22, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "test al, bl": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x84", "ExpectedArm64ASM": [ - "and w26, w4, w7", - "cmn wzr, w26, lsl #24" + "mov x20, x7", + "mov x21, x4", + "and w22, w21, w20", + "cmn wzr, w22, lsl #24", + "mov x26, x22" ] }, "test ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x84", "ExpectedArm64ASM": [ - "and w26, w4, w7", - "cmn wzr, w26, lsl #16" + "mov x20, x7", + "mov x21, x4", + "and w22, w21, w20", + "cmn wzr, w22, lsl #16", + "mov x26, x22" ] }, "test eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x84", "ExpectedArm64ASM": [ - "ands w26, w4, w7" + "mov x20, x7", + "mov x21, x4", + "ands w22, w21, w20", + "mov x26, x22" ] }, "test rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x84", "ExpectedArm64ASM": [ - "ands x26, x4, x7" + "mov x20, x7", + "mov x21, x4", + "ands x22, x21, x20", + "mov x26, x22" ] }, "test al, al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x84", "ExpectedArm64ASM": [ - "mov x26, x4", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "mov x21, x20", + "cmn wzr, w21, lsl #24", + "mov x26, x21" ] }, "test ax, ax": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x84", "ExpectedArm64ASM": [ - "mov x26, x4", - "cmn wzr, w26, lsl #16" + "mov x20, x4", + "mov x21, x20", + "cmn wzr, w21, lsl #16", + "mov x26, x21" ] }, "test eax, eax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x84", "ExpectedArm64ASM": [ - "ands w26, w4, w4" + "mov x20, x4", + "ands w21, w20, w20", + "mov x26, x21" ] }, "test rax, rax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x84", "ExpectedArm64ASM": [ - "ands x26, x4, x4" + "mov x20, x4", + "ands x21, x20, x20", + "mov x26, x21" ] }, "xchg bl, cl": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": "0x86", "ExpectedArm64ASM": [ - "mov x20, x5", - "mov x5, x20", - "bfxil x5, x7, #0, #8", - "bfxil x7, x20, #0, #8" + "mov x20, x7", + "mov x21, x5", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x5, x22", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x7, x22" ] }, "xchg [rax], cl": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x86", "ExpectedArm64ASM": [ - "swpalb w5, w20, [x4]", - "bfxil x5, x20, #0, #8" + "mov x20, x5", + "mov x21, x4", + "swpalb w20, w22, [x21]", + "mov x21, x20", + "bfxil x21, x22, #0, #8", + "mov x5, x21" ] }, "xchg bx, cx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": "0x87", "ExpectedArm64ASM": [ - "mov x20, x5", - "mov x5, x20", - "bfxil x5, x7, #0, #16", - "bfxil x7, x20, #0, #16" + "mov x20, x7", + "mov x21, x5", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x5, x22", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x7, x22" ] }, "xchg [rax], cx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x87", "ExpectedArm64ASM": [ - "swpalh w5, w20, [x4]", - "bfxil x5, x20, #0, #16" + "mov x20, x5", + "mov x21, x4", + "swpalh w20, w22, [x21]", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x5, x21" ] }, "xchg ebx, ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x87", "ExpectedArm64ASM": [ - "mov x20, x5", - "mov w5, w7", - "mov w7, w20" + "mov x20, x7", + "mov x21, x5", + "mov w22, w20", + "mov x5, x22", + "mov w20, w21", + "mov x7, x20" ] }, "xchg [rax], ecx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x87", "ExpectedArm64ASM": [ - "swpal w5, w5, [x4]" + "mov x20, x5", + "mov x21, x4", + "swpal w20, w22, [x21]", + "mov x5, x22" ] }, "xchg rbx, rcx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "0x87", "ExpectedArm64ASM": [ "mov x20, x7", - "mov x7, x5", - "mov x5, x20" + "mov x21, x5", + "mov x5, x20", + "mov x7, x21" ] }, "xchg [rax], rcx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x87", "ExpectedArm64ASM": [ - "swpal x5, x5, [x4]" + "mov x20, x5", + "mov x21, x4", + "swpal x20, x22, [x21]", + "mov x5, x22" ] }, "mov [rax], bl": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x88", "ExpectedArm64ASM": [ - "uxtb w20, w7", - "strb w20, [x4]" + "mov x20, x7", + "uxtb w21, w20", + "mov x20, x4", + "strb w21, [x20]" ] }, "mov [rax], bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x89", "ExpectedArm64ASM": [ - "uxth w20, w7", - "strh w20, [x4]" + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "strh w21, [x20]" ] }, "mov [rax], ebx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x89", "ExpectedArm64ASM": [ - "mov w20, w7", - "str w20, [x4]" + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "str w21, [x20]" ] }, "mov [rax], rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x89", "ExpectedArm64ASM": [ - "str x7, [x4]" + "mov x20, x7", + "mov x21, x4", + "str x20, [x21]" ] }, "mov bl, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x8a", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "bfxil x7, x20, #0, #8" + "mov x20, x4", + "ldrb w21, [x20]", + "mov x20, x7", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x7, x22" ] }, "mov bx, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x8b", "ExpectedArm64ASM": [ - "ldrh w20, [x4]", - "bfxil x7, x20, #0, #16" + "mov x20, x4", + "ldrh w21, [x20]", + "mov x20, x7", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x7, x22" ] }, "mov ebx, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x8b", "ExpectedArm64ASM": [ - "ldr w7, [x4]" + "mov x20, x4", + "ldr w21, [x20]", + "mov x7, x21" ] }, "mov rbx, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x8b", "ExpectedArm64ASM": [ - "ldr x7, [x4]" + "mov x20, x4", + "ldr x21, [x20]", + "mov x7, x21" ] }, "mov ax, cs": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x8c", "ExpectedArm64ASM": [ "ldrh w20, [x28, #138]", - "bfxil x4, x20, #0, #16" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "mov eax, cs": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0x8c", "ExpectedArm64ASM": [ - "ldrh w4, [x28, #138]" + "ldrh w20, [x28, #138]", + "mov x4, x20" ] }, "mov rax, cs": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0x8c", "ExpectedArm64ASM": [ - "ldrh w4, [x28, #138]" + "ldrh w20, [x28, #138]", + "mov x4, x20" ] }, "mov ax, es": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x8c", "ExpectedArm64ASM": [ "ldrh w20, [x28, #136]", - "bfxil x4, x20, #0, #16" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "mov eax, es": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0x8c", "ExpectedArm64ASM": [ - "ldrh w4, [x28, #136]" + "ldrh w20, [x28, #136]", + "mov x4, x20" ] }, "mov rax, es": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0x8c", "ExpectedArm64ASM": [ - "ldrh w4, [x28, #136]" + "ldrh w20, [x28, #136]", + "mov x4, x20" ] }, "mov ax, ss": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x8c", "ExpectedArm64ASM": [ "ldrh w20, [x28, #140]", - "bfxil x4, x20, #0, #16" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "mov eax, ss": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0x8c", "ExpectedArm64ASM": [ - "ldrh w4, [x28, #140]" + "ldrh w20, [x28, #140]", + "mov x4, x20" ] }, "mov rax, ss": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0x8c", "ExpectedArm64ASM": [ - "ldrh w4, [x28, #140]" + "ldrh w20, [x28, #140]", + "mov x4, x20" ] }, "mov ax, ds": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x8c", "ExpectedArm64ASM": [ "ldrh w20, [x28, #142]", - "bfxil x4, x20, #0, #16" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "mov eax, ds": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0x8c", "ExpectedArm64ASM": [ - "ldrh w4, [x28, #142]" + "ldrh w20, [x28, #142]", + "mov x4, x20" ] }, "mov rax, ds": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0x8c", "ExpectedArm64ASM": [ - "ldrh w4, [x28, #142]" + "ldrh w20, [x28, #142]", + "mov x4, x20" ] }, "mov ax, gs": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x8c", "ExpectedArm64ASM": [ - "and x4, x4, #0xffffffffffff0000" + "mov x20, x4", + "and x21, x20, #0xffffffffffff0000", + "mov x4, x21" ] }, "mov eax, gs": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0x8c", "ExpectedArm64ASM": [ - "mov w4, #0x0" + "mov w20, #0x0", + "mov x4, x20" ] }, "mov rax, gs": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0x8c", "ExpectedArm64ASM": [ - "mov w4, #0x0" + "mov w20, #0x0", + "mov x4, x20" ] }, "mov ax, fs": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x8c", "ExpectedArm64ASM": [ - "and x4, x4, #0xffffffffffff0000" + "mov x20, x4", + "and x21, x20, #0xffffffffffff0000", + "mov x4, x21" ] }, "mov eax, fs": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0x8c", "ExpectedArm64ASM": [ - "mov w4, #0x0" + "mov w20, #0x0", + "mov x4, x20" ] }, "mov rax, fs": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0x8c", "ExpectedArm64ASM": [ - "mov w4, #0x0" + "mov w20, #0x0", + "mov x4, x20" ] }, "lea ax, [rbx+rcx*1 + 0]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 7, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5", - "bfxil x4, x20, #0, #16" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21", + "mov x20, x4", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "lea eax, [rbx+rcx*1 + 0]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5", - "mov x20, x20", - "mov w4, w20" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21", + "mov x20, x22", + "mov w21, w20", + "mov x4, x21" ] }, "lea rax, [rbx+rcx*1 + 0]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x4, x7, x5" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21", + "mov x4, x22" ] }, "lea ax, [rbx+rcx*2 + 0]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 7, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5, lsl #1", - "bfxil x4, x20, #0, #16" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #1", + "mov x20, x4", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "lea eax, [rbx+rcx*2 + 0]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5, lsl #1", - "mov x20, x20", - "mov w4, w20" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #1", + "mov x20, x22", + "mov w21, w20", + "mov x4, x21" ] }, "lea rax, [rbx+rcx*2 + 0]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x4, x7, x5, lsl #1" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #1", + "mov x4, x22" ] }, "lea ax, [rbx+rcx*4 + 0]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 7, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5, lsl #2", - "bfxil x4, x20, #0, #16" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #2", + "mov x20, x4", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "lea eax, [rbx+rcx*4 + 0]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5, lsl #2", - "mov x20, x20", - "mov w4, w20" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #2", + "mov x20, x22", + "mov w21, w20", + "mov x4, x21" ] }, "lea rax, [rbx+rcx*4 + 0]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x4, x7, x5, lsl #2" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #2", + "mov x4, x22" ] }, "lea ax, [rbx+rcx*8 + 0]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 7, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5, lsl #3", - "bfxil x4, x20, #0, #16" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #3", + "mov x20, x4", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "lea eax, [rbx+rcx*8 + 0]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5, lsl #3", - "mov x20, x20", - "mov w4, w20" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #3", + "mov x20, x22", + "mov w21, w20", + "mov x4, x21" ] }, "lea rax, [rbx+rcx*8 + 0]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x4, x7, x5, lsl #3" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #3", + "mov x4, x22" ] }, "lea ax, [ebx+ecx*1 + 0]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5", - "mov w20, w20", - "bfxil x4, x20, #0, #16" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21", + "mov w20, w22", + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "lea eax, [ebx+ecx*1 + 0]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5", - "mov w4, w20" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21", + "mov w20, w22", + "mov x4, x20" ] }, "lea rax, [ebx+ecx*1 + 0]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5", - "mov w4, w20" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21", + "mov w20, w22", + "mov x4, x20" ] }, "lea ax, [ebx+ecx*2 + 0]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5, lsl #1", - "mov w20, w20", - "bfxil x4, x20, #0, #16" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #1", + "mov w20, w22", + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "lea eax, [ebx+ecx*2 + 0]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5, lsl #1", - "mov w4, w20" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #1", + "mov w20, w22", + "mov x4, x20" ] }, "lea rax, [ebx+ecx*2 + 0]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5, lsl #1", - "mov w4, w20" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #1", + "mov w20, w22", + "mov x4, x20" ] }, "lea ax, [ebx+ecx*4 + 0]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5, lsl #2", - "mov w20, w20", - "bfxil x4, x20, #0, #16" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #2", + "mov w20, w22", + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "lea eax, [ebx+ecx*4 + 0]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5, lsl #2", - "mov w4, w20" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #2", + "mov w20, w22", + "mov x4, x20" ] }, "lea rax, [ebx+ecx*4 + 0]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5, lsl #2", - "mov w4, w20" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #2", + "mov w20, w22", + "mov x4, x20" ] }, "lea ax, [ebx+ecx*8 + 0]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 8, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5, lsl #3", - "mov w20, w20", - "bfxil x4, x20, #0, #16" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #3", + "mov w20, w22", + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "lea eax, [ebx+ecx*8 + 0]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5, lsl #3", - "mov w4, w20" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #3", + "mov w20, w22", + "mov x4, x20" ] }, "lea rax, [ebx+ecx*8 + 0]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x8d", "ExpectedArm64ASM": [ - "add x20, x7, x5, lsl #3", - "mov w4, w20" + "mov x20, x7", + "mov x21, x5", + "add x22, x20, x21, lsl #3", + "mov w20, w22", + "mov x4, x20" ] }, "mov cs, ax": { @@ -2423,39 +3142,42 @@ "Comment": "0x8e" }, "mov es, ax": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "0x8e", "ExpectedArm64ASM": [ - "uxth w20, w4", - "strh w20, [x28, #136]", - "ubfx w20, w20, #3, #13", + "mov x20, x4", + "uxth w21, w20", + "strh w21, [x28, #136]", + "ubfx w20, w21, #3, #13", "add x0, x28, x20, lsl #2", - "ldr w20, [x0, #896]", - "str w20, [x28, #152]" + "ldr w21, [x0, #896]", + "str w21, [x28, #152]" ] }, "mov ss, ax": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "0x8e", "ExpectedArm64ASM": [ - "uxth w20, w4", - "strh w20, [x28, #140]", - "ubfx w20, w20, #3, #13", + "mov x20, x4", + "uxth w21, w20", + "strh w21, [x28, #140]", + "ubfx w20, w21, #3, #13", "add x0, x28, x20, lsl #2", - "ldr w20, [x0, #896]", - "str w20, [x28, #160]" + "ldr w21, [x0, #896]", + "str w21, [x28, #160]" ] }, "mov ds, ax": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "0x8e", "ExpectedArm64ASM": [ - "uxth w20, w4", - "strh w20, [x28, #142]", - "ubfx w20, w20, #3, #13", + "mov x20, x4", + "uxth w21, w20", + "strh w21, [x28, #142]", + "ubfx w20, w21, #3, #13", "add x0, x28, x20, lsl #2", - "ldr w20, [x0, #896]", - "str w20, [x28, #164]" + "ldr w21, [x0, #896]", + "str w21, [x28, #164]" ] }, "mov gs, ax": { @@ -2469,49 +3191,63 @@ "Comment": "0x8e" }, "pop word [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x8f", "ExpectedArm64ASM": [ - "ldrh w20, [x8]", - "add x8, x8, #0x2 (2)", - "strh w20, [x4]" + "mov x20, x8", + "ldrh w21, [x20]", + "add x22, x20, #0x2 (2)", + "mov x8, x22", + "mov x20, x4", + "strh w21, [x20]" ] }, "pop qword [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x8f", "ExpectedArm64ASM": [ - "ldr x20, [x8]", - "add x8, x8, #0x8 (8)", - "str x20, [x4]" + "mov x20, x8", + "ldr x21, [x20]", + "add x22, x20, #0x8 (8)", + "mov x8, x22", + "mov x20, x4", + "str x21, [x20]" ] }, "xchg ax, bx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": "0x90", "ExpectedArm64ASM": [ - "mov x20, x7", - "mov x7, x20", - "bfxil x7, x4, #0, #16", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x7, x22", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "xchg eax, ebx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x90", "ExpectedArm64ASM": [ - "mov x20, x7", - "mov w7, w4", - "mov w4, w20" + "mov x20, x4", + "mov x21, x7", + "mov w22, w20", + "mov x7, x22", + "mov w20, w21", + "mov x4, x20" ] }, "xchg rax, rbx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "0x90", "ExpectedArm64ASM": [ "mov x20, x4", - "mov x4, x7", - "mov x7, x20" + "mov x21, x7", + "mov x7, x20", + "mov x4, x21" ] }, "nop": { @@ -2520,50 +3256,65 @@ "ExpectedArm64ASM": [] }, "cbw": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x98", "ExpectedArm64ASM": [ - "sxtb w20, w4", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "sxtb w21, w20", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "cwde": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x98", "ExpectedArm64ASM": [ - "sxth w4, w4" + "mov x20, x4", + "sxth w21, w20", + "mov x4, x21" ] }, "cdqe": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x98", "ExpectedArm64ASM": [ - "sxtw x4, w4" + "mov x20, x4", + "sxtw x21, w20", + "mov x4, x21" ] }, "cwd": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x98", "ExpectedArm64ASM": [ - "uxth w20, w4", - "sbfx x20, x20, #15, #1", - "bfxil x6, x20, #0, #16" + "mov x20, x4", + "uxth w21, w20", + "sbfx x20, x21, #15, #1", + "mov x21, x6", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x6, x22" ] }, "cdq": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x99", "ExpectedArm64ASM": [ - "mov w20, w4", - "sbfx x20, x20, #31, #1", - "mov w6, w20" + "mov x20, x4", + "mov w21, w20", + "sbfx x20, x21, #31, #1", + "mov w21, w20", + "mov x6, x21" ] }, "cqo": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x99", "ExpectedArm64ASM": [ - "asr x6, x4, #63" + "mov x20, x4", + "asr x21, x20, #63", + "mov x6, x21" ] }, "fwait": { @@ -2572,184 +3323,207 @@ "ExpectedArm64ASM": [] }, "pushf": { - "ExpectedInstructionCount": 39, + "ExpectedInstructionCount": 44, "Comment": "0x9c", "ExpectedArm64ASM": [ "cset w20, hs", - "eor w21, w27, w26", - "ubfx w21, w21, #4, #1", - "orr x20, x20, x21, lsl #4", - "ldrb w21, [x28, #712]", - "orr x20, x20, x21, lsl #8", - "ldrb w21, [x28, #713]", - "orr x20, x20, x21, lsl #9", - "ldrsb x21, [x28, #714]", - "lsr x21, x21, #63", - "orr x20, x20, x21, lsl #10", + "mov x21, x27", + "mov x22, x26", + "eor w23, w21, w22", + "ubfx w21, w23, #4, #1", + "orr x23, x20, x21, lsl #4", + "ldrb w20, [x28, #712]", + "orr x21, x23, x20, lsl #8", + "ldrb w20, [x28, #713]", + "orr x23, x21, x20, lsl #9", + "ldrsb x20, [x28, #714]", + "lsr x21, x20, #63", + "orr x20, x23, x21, lsl #10", "cset w21, vs", - "orr x20, x20, x21, lsl #11", - "ldrb w21, [x28, #716]", - "orr x20, x20, x21, lsl #12", - "ldrb w21, [x28, #718]", - "orr x20, x20, x21, lsl #14", - "ldrb w21, [x28, #720]", - "orr x20, x20, x21, lsl #16", - "ldrb w21, [x28, #721]", - "orr x20, x20, x21, lsl #17", - "ldrb w21, [x28, #722]", - "orr x20, x20, x21, lsl #18", - "ldrb w21, [x28, #723]", - "orr x20, x20, x21, lsl #19", - "ldrb w21, [x28, #724]", - "orr x20, x20, x21, lsl #20", - "ldrb w21, [x28, #725]", - "orr x20, x20, x21, lsl #21", - "eor w21, w26, w26, lsr #4", - "eor w21, w21, w21, lsr #2", - "eor w21, w21, w21, lsr #1", - "orr x21, x21, #0xfffffffffffffffe", - "orn x20, x20, x21, ror #62", + "orr x23, x20, x21, lsl #11", + "ldrb w20, [x28, #716]", + "orr x21, x23, x20, lsl #12", + "ldrb w20, [x28, #718]", + "orr x23, x21, x20, lsl #14", + "ldrb w20, [x28, #720]", + "orr x21, x23, x20, lsl #16", + "ldrb w20, [x28, #721]", + "orr x23, x21, x20, lsl #17", + "ldrb w20, [x28, #722]", + "orr x21, x23, x20, lsl #18", + "ldrb w20, [x28, #723]", + "orr x23, x21, x20, lsl #19", + "ldrb w20, [x28, #724]", + "orr x21, x23, x20, lsl #20", + "ldrb w20, [x28, #725]", + "orr x23, x21, x20, lsl #21", + "eor w20, w22, w22, lsr #4", + "eor w21, w20, w20, lsr #2", + "eor w20, w21, w21, lsr #1", + "orr x21, x20, #0xfffffffffffffffe", + "orn x20, x23, x21, ror #62", "mrs x21, nzcv", - "and x21, x21, #0xc0000000", - "orr x20, x20, x21, lsr #24", - "orr x20, x20, #0x2", - "str x20, [x8, #-8]!" + "and x22, x21, #0xc0000000", + "orr x21, x20, x22, lsr #24", + "orr x20, x21, #0x2", + "mov x21, x8", + "mov x22, x21", + "str x20, [x22, #-8]!", + "mov x8, x22" ] }, "pushfq": { - "ExpectedInstructionCount": 39, + "ExpectedInstructionCount": 44, "Comment": "0x9c", "ExpectedArm64ASM": [ "cset w20, hs", - "eor w21, w27, w26", - "ubfx w21, w21, #4, #1", - "orr x20, x20, x21, lsl #4", - "ldrb w21, [x28, #712]", - "orr x20, x20, x21, lsl #8", - "ldrb w21, [x28, #713]", - "orr x20, x20, x21, lsl #9", - "ldrsb x21, [x28, #714]", - "lsr x21, x21, #63", - "orr x20, x20, x21, lsl #10", + "mov x21, x27", + "mov x22, x26", + "eor w23, w21, w22", + "ubfx w21, w23, #4, #1", + "orr x23, x20, x21, lsl #4", + "ldrb w20, [x28, #712]", + "orr x21, x23, x20, lsl #8", + "ldrb w20, [x28, #713]", + "orr x23, x21, x20, lsl #9", + "ldrsb x20, [x28, #714]", + "lsr x21, x20, #63", + "orr x20, x23, x21, lsl #10", "cset w21, vs", - "orr x20, x20, x21, lsl #11", - "ldrb w21, [x28, #716]", - "orr x20, x20, x21, lsl #12", - "ldrb w21, [x28, #718]", - "orr x20, x20, x21, lsl #14", - "ldrb w21, [x28, #720]", - "orr x20, x20, x21, lsl #16", - "ldrb w21, [x28, #721]", - "orr x20, x20, x21, lsl #17", - "ldrb w21, [x28, #722]", - "orr x20, x20, x21, lsl #18", - "ldrb w21, [x28, #723]", - "orr x20, x20, x21, lsl #19", - "ldrb w21, [x28, #724]", - "orr x20, x20, x21, lsl #20", - "ldrb w21, [x28, #725]", - "orr x20, x20, x21, lsl #21", - "eor w21, w26, w26, lsr #4", - "eor w21, w21, w21, lsr #2", - "eor w21, w21, w21, lsr #1", - "orr x21, x21, #0xfffffffffffffffe", - "orn x20, x20, x21, ror #62", + "orr x23, x20, x21, lsl #11", + "ldrb w20, [x28, #716]", + "orr x21, x23, x20, lsl #12", + "ldrb w20, [x28, #718]", + "orr x23, x21, x20, lsl #14", + "ldrb w20, [x28, #720]", + "orr x21, x23, x20, lsl #16", + "ldrb w20, [x28, #721]", + "orr x23, x21, x20, lsl #17", + "ldrb w20, [x28, #722]", + "orr x21, x23, x20, lsl #18", + "ldrb w20, [x28, #723]", + "orr x23, x21, x20, lsl #19", + "ldrb w20, [x28, #724]", + "orr x21, x23, x20, lsl #20", + "ldrb w20, [x28, #725]", + "orr x23, x21, x20, lsl #21", + "eor w20, w22, w22, lsr #4", + "eor w21, w20, w20, lsr #2", + "eor w20, w21, w21, lsr #1", + "orr x21, x20, #0xfffffffffffffffe", + "orn x20, x23, x21, ror #62", "mrs x21, nzcv", - "and x21, x21, #0xc0000000", - "orr x20, x20, x21, lsr #24", - "orr x20, x20, #0x2", - "str x20, [x8, #-8]!" + "and x22, x21, #0xc0000000", + "orr x21, x20, x22, lsr #24", + "orr x20, x21, #0x2", + "mov x21, x8", + "mov x22, x21", + "str x20, [x22, #-8]!", + "mov x8, x22" ] }, "popf": { - "ExpectedInstructionCount": 39, + "ExpectedInstructionCount": 43, "Comment": "0x9d", "ExpectedArm64ASM": [ - "ldr x20, [x8]", - "add x8, x8, #0x8 (8)", - "mov w21, #0x202", - "orr x27, x20, x21", - "ubfx x20, x27, #0, #1", - "lsl x20, x20, #29", - "ubfx w21, w27, #2, #1", - "mov w22, #0x1", - "eor w26, w21, #0x1", - "ubfx x21, x27, #6, #1", - "orr w20, w20, w21, lsl #30", - "ubfx x21, x27, #7, #1", - "orr w20, w20, w21, lsl #31", - "ubfx w21, w27, #8, #1", - "strb w21, [x28, #712]", - "ubfx w21, w27, #9, #1", - "strb w21, [x28, #713]", - "ubfx w21, w27, #10, #1", - "sub x21, x22, x21, lsl #1", - "strb w21, [x28, #714]", - "ubfx x21, x27, #11, #1", - "orr w20, w20, w21, lsl #28", - "ubfx w21, w27, #12, #1", - "strb w21, [x28, #716]", - "ubfx w21, w27, #14, #1", - "strb w21, [x28, #718]", - "ubfx w21, w27, #16, #1", - "strb w21, [x28, #720]", - "ubfx w21, w27, #17, #1", - "strb w21, [x28, #721]", - "ubfx w21, w27, #18, #1", - "strb w21, [x28, #722]", - "ubfx w21, w27, #19, #1", - "strb w21, [x28, #723]", - "ubfx w21, w27, #20, #1", - "strb w21, [x28, #724]", - "ubfx w21, w27, #21, #1", - "strb w21, [x28, #725]", - "msr nzcv, x20" + "mov x20, x8", + "ldr x21, [x20]", + "add x22, x20, #0x8 (8)", + "mov x8, x22", + "mov w20, #0x202", + "orr x22, x21, x20", + "ubfx x20, x22, #0, #1", + "lsl x21, x20, #29", + "ubfx w20, w22, #2, #1", + "mov w23, #0x1", + "eor w24, w20, #0x1", + "mov x26, x24", + "mov x27, x22", + "ubfx x20, x22, #6, #1", + "orr w24, w21, w20, lsl #30", + "ubfx x20, x22, #7, #1", + "orr w21, w24, w20, lsl #31", + "ubfx w20, w22, #8, #1", + "strb w20, [x28, #712]", + "ubfx w20, w22, #9, #1", + "strb w20, [x28, #713]", + "ubfx w20, w22, #10, #1", + "sub x24, x23, x20, lsl #1", + "strb w24, [x28, #714]", + "ubfx x20, x22, #11, #1", + "orr w23, w21, w20, lsl #28", + "ubfx w20, w22, #12, #1", + "strb w20, [x28, #716]", + "ubfx w20, w22, #14, #1", + "strb w20, [x28, #718]", + "ubfx w20, w22, #16, #1", + "strb w20, [x28, #720]", + "ubfx w20, w22, #17, #1", + "strb w20, [x28, #721]", + "ubfx w20, w22, #18, #1", + "strb w20, [x28, #722]", + "ubfx w20, w22, #19, #1", + "strb w20, [x28, #723]", + "ubfx w20, w22, #20, #1", + "strb w20, [x28, #724]", + "ubfx w20, w22, #21, #1", + "strb w20, [x28, #725]", + "msr nzcv, x23" ] }, "sahf": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 20, "Comment": "0x9e", "ExpectedArm64ASM": [ - "ubfx w20, w4, #8, #8", - "mov w21, #0x28", - "bic x20, x20, x21", - "orr x27, x20, #0x2", - "ubfx x20, x27, #0, #1", - "mrs x21, nzcv", - "mov w0, w21", - "bfi w0, w20, #29, #1", - "mov w20, w0", - "ubfx w21, w27, #2, #1", - "eor w26, w21, #0x1", - "ubfx x21, x27, #6, #1", - "bfi w20, w21, #30, #1", - "ubfx x21, x27, #7, #1", + "mov x20, x4", + "ubfx w21, w20, #8, #8", + "mov w20, #0x28", + "bic x22, x21, x20", + "orr x20, x22, #0x2", + "ubfx x21, x20, #0, #1", + "mrs x22, nzcv", + "mov w23, w22", + "bfi w23, w21, #29, #1", + "ubfx w21, w20, #2, #1", + "eor w22, w21, #0x1", + "mov x26, x22", + "mov x27, x20", + "ubfx x21, x20, #6, #1", + "mov w22, w23", + "bfi w22, w21, #30, #1", + "ubfx x21, x20, #7, #1", + "mov w20, w22", "bfi w20, w21, #31, #1", "msr nzcv, x20" ] }, "lahf": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 19, "Comment": "0x9f", "ExpectedArm64ASM": [ "cset w20, hs", - "eor w21, w27, w26", - "ubfx w21, w21, #4, #1", - "orr x20, x20, x21, lsl #4", - "eor w21, w26, w26, lsr #4", - "eor w21, w21, w21, lsr #2", - "eor w21, w21, w21, lsr #1", - "orr x21, x21, #0xfffffffffffffffe", - "orn x20, x20, x21, ror #62", + "mov x21, x27", + "mov x22, x26", + "eor w23, w21, w22", + "ubfx w21, w23, #4, #1", + "orr x23, x20, x21, lsl #4", + "eor w20, w22, w22, lsr #4", + "eor w21, w20, w20, lsr #2", + "eor w20, w21, w21, lsr #1", + "orr x21, x20, #0xfffffffffffffffe", + "orn x20, x23, x21, ror #62", "mrs x21, nzcv", - "and x21, x21, #0xc0000000", - "orr x20, x20, x21, lsr #24", - "orr x20, x20, #0x2", - "bfi x4, x20, #8, #8" + "and x22, x21, #0xc0000000", + "orr x21, x20, x22, lsr #24", + "orr x20, x21, #0x2", + "mov x21, x4", + "mov x22, x21", + "bfi x22, x20, #8, #8", + "mov x4, x22" ] }, "db 0x48, 0xa1; dq 0x00000000e0000008": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": [ "mov rax, [0xe0000008]", "0xa1" @@ -2759,11 +3533,12 @@ "movk x20, #0x3020, lsl #16", "movk x20, #0x3678, lsl #32", "movk x20, #0x2c37, lsl #48", - "ldr x4, [x20]" + "ldr x21, [x20]", + "mov x4, x21" ] }, "db 0x67, 0xa1; dd 0xe0000000": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": [ "mov eax, [0xe0000000]", "0xa1" @@ -2771,100 +3546,122 @@ "ExpectedArm64ASM": [ "mov w20, #0x6264", "movk w20, #0x3020, lsl #16", - "ldr w4, [x20]" + "ldr w21, [x20]", + "mov x4, x21" ] }, "db 0x48, 0xa3; dq 0x00000000e0000008": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": [ "mov [0xe0000008], rax", "0xa3" ], "ExpectedArm64ASM": [ - "mov x20, #0x6264", - "movk x20, #0x3020, lsl #16", - "movk x20, #0x3678, lsl #32", - "movk x20, #0x2c37, lsl #48", - "str x4, [x20]" + "mov x20, x4", + "mov x21, #0x6264", + "movk x21, #0x3020, lsl #16", + "movk x21, #0x3678, lsl #32", + "movk x21, #0x2c37, lsl #48", + "str x20, [x21]" ] }, "db 0x67, 0xa3; dd 0xe0000000": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": [ "mov [0xe0000000], eax", "0xa3" ], "ExpectedArm64ASM": [ - "mov w20, w4", - "mov w21, #0x6f6d", - "movk w21, #0x7376, lsl #16", - "str w20, [x21]" + "mov x20, x4", + "mov w21, w20", + "mov w20, #0x6f6d", + "movk w20, #0x7376, lsl #16", + "str w21, [x20]" ] }, "movsb": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0xa4" ], "ExpectedArm64ASM": [ - "ldrb w20, [x10]", - "strb w20, [x11]", - "ldrsb x20, [x28, #714]", - "add x10, x10, x20", - "add x11, x11, x20" + "mov x20, x10", + "mov x21, x11", + "ldrb w22, [x20]", + "strb w22, [x21]", + "ldrsb x22, [x28, #714]", + "add x23, x20, x22", + "add x20, x21, x22", + "mov x10, x23", + "mov x11, x20" ] }, "movsw": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "0xa5" ], "ExpectedArm64ASM": [ - "ldrh w20, [x10]", - "strh w20, [x11]", - "ldrsb x20, [x28, #714]", - "lsl x20, x20, #1", - "add x10, x10, x20", - "add x11, x11, x20" + "mov x20, x10", + "mov x21, x11", + "ldrh w22, [x20]", + "strh w22, [x21]", + "ldrsb x22, [x28, #714]", + "lsl x23, x22, #1", + "add x22, x20, x23", + "add x20, x21, x23", + "mov x10, x22", + "mov x11, x20" ] }, "movsd": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "0xa5" ], "ExpectedArm64ASM": [ - "ldr w20, [x10]", - "str w20, [x11]", - "ldrsb x20, [x28, #714]", - "lsl x20, x20, #2", - "add x10, x10, x20", - "add x11, x11, x20" + "mov x20, x10", + "mov x21, x11", + "ldr w22, [x20]", + "str w22, [x21]", + "ldrsb x22, [x28, #714]", + "lsl x23, x22, #2", + "add x22, x20, x23", + "add x20, x21, x23", + "mov x10, x22", + "mov x11, x20" ] }, "movsq": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "0xa5" ], "ExpectedArm64ASM": [ - "ldr x20, [x10]", - "str x20, [x11]", - "ldrsb x20, [x28, #714]", - "lsl x20, x20, #3", - "add x10, x10, x20", - "add x11, x11, x20" + "mov x20, x10", + "mov x21, x11", + "ldr x22, [x20]", + "str x22, [x21]", + "ldrsb x22, [x28, #714]", + "lsl x23, x22, #3", + "add x22, x20, x23", + "add x20, x21, x23", + "mov x10, x22", + "mov x11, x20" ] }, "rep movsb": { - "ExpectedInstructionCount": 83, + "ExpectedInstructionCount": 89, "Comment": "0xa4", "ExpectedArm64ASM": [ - "ldrsb x20, [x28, #714]", - "mov x0, x5", - "mov x1, x11", - "mov x2, x10", - "tbnz w20, #1, #+0x94", + "mov x20, x10", + "mov x21, x11", + "mov x22, x5", + "ldrsb x23, [x28, #714]", + "mov x0, x22", + "mov x1, x21", + "mov x2, x20", + "tbnz w23, #1, #+0x94", "cbz x0, #+0x78", "sub x3, x1, x2", "tbz x3, #63, #+0x8", @@ -2895,11 +3692,11 @@ "strb w3, [x1], #1", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0xc", - "mov x0, x11", - "mov x1, x10", - "mov x2, x5", - "add x20, x0, x2", - "add x21, x1, x2", + "mov x0, x21", + "mov x1, x20", + "mov x2, x22", + "add x24, x0, x2", + "add x25, x1, x2", "b #+0xa0", "cbz x0, #+0x88", "sub x3, x1, x2", @@ -2935,25 +3732,31 @@ "strb w3, [x1], #-1", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0xc", - "mov x0, x11", - "mov x1, x10", - "mov x2, x5", - "sub x20, x0, x2", - "sub x21, x1, x2", + "mov x0, x21", + "mov x1, x20", + "mov x2, x22", + "sub x24, x0, x2", + "sub x25, x1, x2", + "mov x20, x24", + "mov x21, x25", + "mov w22, #0x0", + "mov x5, x22", "mov x11, x20", - "mov x10, x21", - "mov w5, #0x0" + "mov x10, x21" ] }, "rep movsw": { - "ExpectedInstructionCount": 83, + "ExpectedInstructionCount": 89, "Comment": "0xa5", "ExpectedArm64ASM": [ - "ldrsb x20, [x28, #714]", - "mov x0, x5", - "mov x1, x11", - "mov x2, x10", - "tbnz w20, #1, #+0x94", + "mov x20, x10", + "mov x21, x11", + "mov x22, x5", + "ldrsb x23, [x28, #714]", + "mov x0, x22", + "mov x1, x21", + "mov x2, x20", + "tbnz w23, #1, #+0x94", "cbz x0, #+0x78", "sub x3, x1, x2", "tbz x3, #63, #+0x8", @@ -2984,11 +3787,11 @@ "strh w3, [x1], #2", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0xc", - "mov x0, x11", - "mov x1, x10", - "mov x2, x5", - "add x20, x0, x2, lsl #1", - "add x21, x1, x2, lsl #1", + "mov x0, x21", + "mov x1, x20", + "mov x2, x22", + "add x24, x0, x2, lsl #1", + "add x25, x1, x2, lsl #1", "b #+0xa0", "cbz x0, #+0x88", "sub x3, x1, x2", @@ -3024,25 +3827,31 @@ "strh w3, [x1], #-2", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0xc", - "mov x0, x11", - "mov x1, x10", - "mov x2, x5", - "sub x20, x0, x2, lsl #1", - "sub x21, x1, x2, lsl #1", + "mov x0, x21", + "mov x1, x20", + "mov x2, x22", + "sub x24, x0, x2, lsl #1", + "sub x25, x1, x2, lsl #1", + "mov x20, x24", + "mov x21, x25", + "mov w22, #0x0", + "mov x5, x22", "mov x11, x20", - "mov x10, x21", - "mov w5, #0x0" + "mov x10, x21" ] }, "rep movsd": { - "ExpectedInstructionCount": 83, + "ExpectedInstructionCount": 89, "Comment": "0xa5", "ExpectedArm64ASM": [ - "ldrsb x20, [x28, #714]", - "mov x0, x5", - "mov x1, x11", - "mov x2, x10", - "tbnz w20, #1, #+0x94", + "mov x20, x10", + "mov x21, x11", + "mov x22, x5", + "ldrsb x23, [x28, #714]", + "mov x0, x22", + "mov x1, x21", + "mov x2, x20", + "tbnz w23, #1, #+0x94", "cbz x0, #+0x78", "sub x3, x1, x2", "tbz x3, #63, #+0x8", @@ -3073,11 +3882,11 @@ "str w3, [x1], #4", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0xc", - "mov x0, x11", - "mov x1, x10", - "mov x2, x5", - "add x20, x0, x2, lsl #2", - "add x21, x1, x2, lsl #2", + "mov x0, x21", + "mov x1, x20", + "mov x2, x22", + "add x24, x0, x2, lsl #2", + "add x25, x1, x2, lsl #2", "b #+0xa0", "cbz x0, #+0x88", "sub x3, x1, x2", @@ -3113,25 +3922,31 @@ "str w3, [x1], #-4", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0xc", - "mov x0, x11", - "mov x1, x10", - "mov x2, x5", - "sub x20, x0, x2, lsl #2", - "sub x21, x1, x2, lsl #2", + "mov x0, x21", + "mov x1, x20", + "mov x2, x22", + "sub x24, x0, x2, lsl #2", + "sub x25, x1, x2, lsl #2", + "mov x20, x24", + "mov x21, x25", + "mov w22, #0x0", + "mov x5, x22", "mov x11, x20", - "mov x10, x21", - "mov w5, #0x0" + "mov x10, x21" ] }, "rep movsq": { - "ExpectedInstructionCount": 83, + "ExpectedInstructionCount": 89, "Comment": "0xa5", "ExpectedArm64ASM": [ - "ldrsb x20, [x28, #714]", - "mov x0, x5", - "mov x1, x11", - "mov x2, x10", - "tbnz w20, #1, #+0x94", + "mov x20, x10", + "mov x21, x11", + "mov x22, x5", + "ldrsb x23, [x28, #714]", + "mov x0, x22", + "mov x1, x21", + "mov x2, x20", + "tbnz w23, #1, #+0x94", "cbz x0, #+0x78", "sub x3, x1, x2", "tbz x3, #63, #+0x8", @@ -3162,11 +3977,11 @@ "str x3, [x1], #8", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0xc", - "mov x0, x11", - "mov x1, x10", - "mov x2, x5", - "add x20, x0, x2, lsl #3", - "add x21, x1, x2, lsl #3", + "mov x0, x21", + "mov x1, x20", + "mov x2, x22", + "add x24, x0, x2, lsl #3", + "add x25, x1, x2, lsl #3", "b #+0xa0", "cbz x0, #+0x88", "sub x3, x1, x2", @@ -3202,468 +4017,686 @@ "str x3, [x1], #-8", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0xc", - "mov x0, x11", - "mov x1, x10", - "mov x2, x5", - "sub x20, x0, x2, lsl #3", - "sub x21, x1, x2, lsl #3", + "mov x0, x21", + "mov x1, x20", + "mov x2, x22", + "sub x24, x0, x2, lsl #3", + "sub x25, x1, x2, lsl #3", + "mov x20, x24", + "mov x21, x25", + "mov w22, #0x0", + "mov x5, x22", "mov x11, x20", - "mov x10, x21", - "mov w5, #0x0" + "mov x10, x21" ] }, "cmpsb": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 18, "Comment": [ "0xa6" ], "ExpectedArm64ASM": [ - "ldrb w20, [x11]", - "ldrb w21, [x10]", - "ldrsb x22, [x28, #714]", - "add x11, x11, x22", - "add x10, x10, x22", - "eor w27, w21, w20", - "lsl w0, w21, #24", - "cmp w0, w20, lsl #24", - "sub w26, w21, w20", + "mov x20, x10", + "mov x21, x11", + "ldrb w22, [x21]", + "ldrb w23, [x20]", + "ldrsb x24, [x28, #714]", + "add x25, x21, x24", + "mov x11, x25", + "add x21, x20, x24", + "mov x10, x21", + "eor w20, w23, w22", + "mov x27, x20", + "lsl w0, w23, #24", + "cmp w0, w22, lsl #24", + "sub w20, w23, w22", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmpsw": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 19, "Comment": [ "0xa7" ], "ExpectedArm64ASM": [ - "ldrh w20, [x11]", - "ldrh w21, [x10]", - "ldrsb x22, [x28, #714]", - "lsl x22, x22, #1", - "add x11, x11, x22", - "add x10, x10, x22", - "eor w27, w21, w20", - "lsl w0, w21, #16", - "cmp w0, w20, lsl #16", - "sub w26, w21, w20", + "mov x20, x10", + "mov x21, x11", + "ldrh w22, [x21]", + "ldrh w23, [x20]", + "ldrsb x24, [x28, #714]", + "lsl x25, x24, #1", + "add x24, x21, x25", + "mov x11, x24", + "add x21, x20, x25", + "mov x10, x21", + "eor w20, w23, w22", + "mov x27, x20", + "lsl w0, w23, #16", + "cmp w0, w22, lsl #16", + "sub w20, w23, w22", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmpsd": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 17, "Comment": [ "0xa7" ], "ExpectedArm64ASM": [ - "ldr w20, [x11]", - "ldr w21, [x10]", - "ldrsb x22, [x28, #714]", - "lsl x22, x22, #2", - "add x11, x11, x22", - "add x10, x10, x22", - "eor w27, w21, w20", - "subs w26, w21, w20", + "mov x20, x10", + "mov x21, x11", + "ldr w22, [x21]", + "ldr w23, [x20]", + "ldrsb x24, [x28, #714]", + "lsl x25, x24, #2", + "add x24, x21, x25", + "mov x11, x24", + "add x21, x20, x25", + "mov x10, x21", + "eor w20, w23, w22", + "mov x27, x20", + "subs w20, w23, w22", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmpsq": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 17, "Comment": [ "0xa7" ], "ExpectedArm64ASM": [ - "ldr x20, [x11]", - "ldr x21, [x10]", - "ldrsb x22, [x28, #714]", - "lsl x22, x22, #3", - "add x11, x11, x22", - "add x10, x10, x22", - "eor w27, w21, w20", - "subs x26, x21, x20", + "mov x20, x10", + "mov x21, x11", + "ldr x22, [x21]", + "ldr x23, [x20]", + "ldrsb x24, [x28, #714]", + "lsl x25, x24, #3", + "add x24, x21, x25", + "mov x11, x24", + "add x21, x20, x25", + "mov x10, x21", + "eor w20, w23, w22", + "mov x27, x20", + "subs x20, x23, x22", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "repz cmpsb": { - "ExpectedInstructionCount": 28, + "ExpectedInstructionCount": 48, "Comment": "0xa6", "ExpectedArm64ASM": [ - "cbz x5, #+0x70", + "mov x20, x5", + "cbz x20, #+0xbc", "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x24", - "ldrb w26, [x11]", - "ldrb w27, [x10]", - "subs x5, x5, #0x1 (1)", - "add x11, x11, #0x1 (1)", - "add x10, x10, #0x1 (1)", - "ccmp x27, x26, #nzcv, ne", - "b.eq #-0x18", - "b #+0x20", - "ldrb w26, [x11]", - "ldrb w27, [x10]", - "subs x5, x5, #0x1 (1)", - "sub x11, x11, #0x1 (1)", - "sub x10, x10, #0x1 (1)", - "ccmp x27, x26, #nzcv, ne", - "b.eq #-0x18", - "mov x20, x27", - "eor w27, w20, w26", - "lsl w0, w20, #24", - "cmp w0, w26, lsl #24", - "sub w26, w20, w26", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x44", + "mov x20, x10", + "mov x21, x11", + "ldrb w22, [x21]", + "ldrb w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "add x24, x21, #0x1 (1)", + "mov x11, x24", + "add x21, x20, #0x1 (1)", + "mov x10, x21", + "ccmp x23, x22, #nzcv, ne", + "b.eq #-0x38", + "b #+0x40", + "mov x20, x10", + "mov x21, x11", + "ldrb w22, [x21]", + "ldrb w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "sub x24, x21, #0x1 (1)", + "mov x11, x24", + "sub x21, x20, #0x1 (1)", + "mov x10, x21", + "ccmp x23, x22, #nzcv, ne", + "b.eq #-0x38", + "mov x20, x26", + "mov x21, x27", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #24", + "cmp w0, w20, lsl #24", + "sub w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "repz cmpsw": { - "ExpectedInstructionCount": 28, + "ExpectedInstructionCount": 48, "Comment": "0xa7", "ExpectedArm64ASM": [ - "cbz x5, #+0x70", + "mov x20, x5", + "cbz x20, #+0xbc", "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x24", - "ldrh w26, [x11]", - "ldrh w27, [x10]", - "subs x5, x5, #0x1 (1)", - "add x11, x11, #0x2 (2)", - "add x10, x10, #0x2 (2)", - "ccmp x27, x26, #nzcv, ne", - "b.eq #-0x18", - "b #+0x20", - "ldrh w26, [x11]", - "ldrh w27, [x10]", - "subs x5, x5, #0x1 (1)", - "sub x11, x11, #0x2 (2)", - "sub x10, x10, #0x2 (2)", - "ccmp x27, x26, #nzcv, ne", - "b.eq #-0x18", - "mov x20, x27", - "eor w27, w20, w26", - "lsl w0, w20, #16", - "cmp w0, w26, lsl #16", - "sub w26, w20, w26", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x44", + "mov x20, x10", + "mov x21, x11", + "ldrh w22, [x21]", + "ldrh w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "add x24, x21, #0x2 (2)", + "mov x11, x24", + "add x21, x20, #0x2 (2)", + "mov x10, x21", + "ccmp x23, x22, #nzcv, ne", + "b.eq #-0x38", + "b #+0x40", + "mov x20, x10", + "mov x21, x11", + "ldrh w22, [x21]", + "ldrh w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "sub x24, x21, #0x2 (2)", + "mov x11, x24", + "sub x21, x20, #0x2 (2)", + "mov x10, x21", + "ccmp x23, x22, #nzcv, ne", + "b.eq #-0x38", + "mov x20, x26", + "mov x21, x27", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #16", + "cmp w0, w20, lsl #16", + "sub w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "repz cmpsd": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 46, "Comment": "0xa7", "ExpectedArm64ASM": [ - "cbz x5, #+0x68", + "mov x20, x5", + "cbz x20, #+0xb4", "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x24", - "ldr w26, [x11]", - "ldr w27, [x10]", - "subs x5, x5, #0x1 (1)", - "add x11, x11, #0x4 (4)", - "add x10, x10, #0x4 (4)", - "ccmp x27, x26, #nzcv, ne", - "b.eq #-0x18", - "b #+0x20", - "ldr w26, [x11]", - "ldr w27, [x10]", - "subs x5, x5, #0x1 (1)", - "sub x11, x11, #0x4 (4)", - "sub x10, x10, #0x4 (4)", - "ccmp x27, x26, #nzcv, ne", - "b.eq #-0x18", - "mov x20, x27", - "eor w27, w20, w26", - "subs w26, w20, w26", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x44", + "mov x20, x10", + "mov x21, x11", + "ldr w22, [x21]", + "ldr w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "add x24, x21, #0x4 (4)", + "mov x11, x24", + "add x21, x20, #0x4 (4)", + "mov x10, x21", + "ccmp x23, x22, #nzcv, ne", + "b.eq #-0x38", + "b #+0x40", + "mov x20, x10", + "mov x21, x11", + "ldr w22, [x21]", + "ldr w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "sub x24, x21, #0x4 (4)", + "mov x11, x24", + "sub x21, x20, #0x4 (4)", + "mov x10, x21", + "ccmp x23, x22, #nzcv, ne", + "b.eq #-0x38", + "mov x20, x26", + "mov x21, x27", + "eor w22, w21, w20", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "repz cmpsq": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 46, "Comment": "0xa7", "ExpectedArm64ASM": [ - "cbz x5, #+0x68", + "mov x20, x5", + "cbz x20, #+0xb4", "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x24", - "ldr x26, [x11]", - "ldr x27, [x10]", - "subs x5, x5, #0x1 (1)", - "add x11, x11, #0x8 (8)", - "add x10, x10, #0x8 (8)", - "ccmp x27, x26, #nzcv, ne", - "b.eq #-0x18", - "b #+0x20", - "ldr x26, [x11]", - "ldr x27, [x10]", - "subs x5, x5, #0x1 (1)", - "sub x11, x11, #0x8 (8)", - "sub x10, x10, #0x8 (8)", - "ccmp x27, x26, #nzcv, ne", - "b.eq #-0x18", - "mov x20, x27", - "eor w27, w20, w26", - "subs x26, x20, x26", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x44", + "mov x20, x10", + "mov x21, x11", + "ldr x22, [x21]", + "ldr x23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "add x24, x21, #0x8 (8)", + "mov x11, x24", + "add x21, x20, #0x8 (8)", + "mov x10, x21", + "ccmp x23, x22, #nzcv, ne", + "b.eq #-0x38", + "b #+0x40", + "mov x20, x10", + "mov x21, x11", + "ldr x22, [x21]", + "ldr x23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "sub x24, x21, #0x8 (8)", + "mov x11, x24", + "sub x21, x20, #0x8 (8)", + "mov x10, x21", + "ccmp x23, x22, #nzcv, ne", + "b.eq #-0x38", + "mov x20, x26", + "mov x21, x27", + "eor w22, w21, w20", + "mov x27, x22", + "subs x22, x21, x20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "repnz cmpsb": { - "ExpectedInstructionCount": 28, + "ExpectedInstructionCount": 48, "Comment": "0xa6", "ExpectedArm64ASM": [ - "cbz x5, #+0x70", + "mov x20, x5", + "cbz x20, #+0xbc", "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x24", - "ldrb w26, [x11]", - "ldrb w27, [x10]", - "subs x5, x5, #0x1 (1)", - "add x11, x11, #0x1 (1)", - "add x10, x10, #0x1 (1)", - "ccmp x27, x26, #nZcv, ne", - "b.ne #-0x18", - "b #+0x20", - "ldrb w26, [x11]", - "ldrb w27, [x10]", - "subs x5, x5, #0x1 (1)", - "sub x11, x11, #0x1 (1)", - "sub x10, x10, #0x1 (1)", - "ccmp x27, x26, #nZcv, ne", - "b.ne #-0x18", - "mov x20, x27", - "eor w27, w20, w26", - "lsl w0, w20, #24", - "cmp w0, w26, lsl #24", - "sub w26, w20, w26", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x44", + "mov x20, x10", + "mov x21, x11", + "ldrb w22, [x21]", + "ldrb w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "add x24, x21, #0x1 (1)", + "mov x11, x24", + "add x21, x20, #0x1 (1)", + "mov x10, x21", + "ccmp x23, x22, #nZcv, ne", + "b.ne #-0x38", + "b #+0x40", + "mov x20, x10", + "mov x21, x11", + "ldrb w22, [x21]", + "ldrb w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "sub x24, x21, #0x1 (1)", + "mov x11, x24", + "sub x21, x20, #0x1 (1)", + "mov x10, x21", + "ccmp x23, x22, #nZcv, ne", + "b.ne #-0x38", + "mov x20, x26", + "mov x21, x27", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #24", + "cmp w0, w20, lsl #24", + "sub w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "repnz cmpsw": { - "ExpectedInstructionCount": 28, + "ExpectedInstructionCount": 48, "Comment": "0xa7", "ExpectedArm64ASM": [ - "cbz x5, #+0x70", + "mov x20, x5", + "cbz x20, #+0xbc", "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x24", - "ldrh w26, [x11]", - "ldrh w27, [x10]", - "subs x5, x5, #0x1 (1)", - "add x11, x11, #0x2 (2)", - "add x10, x10, #0x2 (2)", - "ccmp x27, x26, #nZcv, ne", - "b.ne #-0x18", - "b #+0x20", - "ldrh w26, [x11]", - "ldrh w27, [x10]", - "subs x5, x5, #0x1 (1)", - "sub x11, x11, #0x2 (2)", - "sub x10, x10, #0x2 (2)", - "ccmp x27, x26, #nZcv, ne", - "b.ne #-0x18", - "mov x20, x27", - "eor w27, w20, w26", - "lsl w0, w20, #16", - "cmp w0, w26, lsl #16", - "sub w26, w20, w26", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x44", + "mov x20, x10", + "mov x21, x11", + "ldrh w22, [x21]", + "ldrh w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "add x24, x21, #0x2 (2)", + "mov x11, x24", + "add x21, x20, #0x2 (2)", + "mov x10, x21", + "ccmp x23, x22, #nZcv, ne", + "b.ne #-0x38", + "b #+0x40", + "mov x20, x10", + "mov x21, x11", + "ldrh w22, [x21]", + "ldrh w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "sub x24, x21, #0x2 (2)", + "mov x11, x24", + "sub x21, x20, #0x2 (2)", + "mov x10, x21", + "ccmp x23, x22, #nZcv, ne", + "b.ne #-0x38", + "mov x20, x26", + "mov x21, x27", + "eor w22, w21, w20", + "mov x27, x22", + "lsl w0, w21, #16", + "cmp w0, w20, lsl #16", + "sub w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "repnz cmpsd": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 46, "Comment": "0xa7", "ExpectedArm64ASM": [ - "cbz x5, #+0x68", + "mov x20, x5", + "cbz x20, #+0xb4", "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x24", - "ldr w26, [x11]", - "ldr w27, [x10]", - "subs x5, x5, #0x1 (1)", - "add x11, x11, #0x4 (4)", - "add x10, x10, #0x4 (4)", - "ccmp x27, x26, #nZcv, ne", - "b.ne #-0x18", - "b #+0x20", - "ldr w26, [x11]", - "ldr w27, [x10]", - "subs x5, x5, #0x1 (1)", - "sub x11, x11, #0x4 (4)", - "sub x10, x10, #0x4 (4)", - "ccmp x27, x26, #nZcv, ne", - "b.ne #-0x18", - "mov x20, x27", - "eor w27, w20, w26", - "subs w26, w20, w26", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x44", + "mov x20, x10", + "mov x21, x11", + "ldr w22, [x21]", + "ldr w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "add x24, x21, #0x4 (4)", + "mov x11, x24", + "add x21, x20, #0x4 (4)", + "mov x10, x21", + "ccmp x23, x22, #nZcv, ne", + "b.ne #-0x38", + "b #+0x40", + "mov x20, x10", + "mov x21, x11", + "ldr w22, [x21]", + "ldr w23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "sub x24, x21, #0x4 (4)", + "mov x11, x24", + "sub x21, x20, #0x4 (4)", + "mov x10, x21", + "ccmp x23, x22, #nZcv, ne", + "b.ne #-0x38", + "mov x20, x26", + "mov x21, x27", + "eor w22, w21, w20", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "repnz cmpsq": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 46, "Comment": "0xa7", "ExpectedArm64ASM": [ - "cbz x5, #+0x68", + "mov x20, x5", + "cbz x20, #+0xb4", "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x24", - "ldr x26, [x11]", - "ldr x27, [x10]", - "subs x5, x5, #0x1 (1)", - "add x11, x11, #0x8 (8)", - "add x10, x10, #0x8 (8)", - "ccmp x27, x26, #nZcv, ne", - "b.ne #-0x18", - "b #+0x20", - "ldr x26, [x11]", - "ldr x27, [x10]", - "subs x5, x5, #0x1 (1)", - "sub x11, x11, #0x8 (8)", - "sub x10, x10, #0x8 (8)", - "ccmp x27, x26, #nZcv, ne", - "b.ne #-0x18", - "mov x20, x27", - "eor w27, w20, w26", - "subs x26, x20, x26", + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x44", + "mov x20, x10", + "mov x21, x11", + "ldr x22, [x21]", + "ldr x23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "add x24, x21, #0x8 (8)", + "mov x11, x24", + "add x21, x20, #0x8 (8)", + "mov x10, x21", + "ccmp x23, x22, #nZcv, ne", + "b.ne #-0x38", + "b #+0x40", + "mov x20, x10", + "mov x21, x11", + "ldr x22, [x21]", + "ldr x23, [x20]", + "mov x26, x22", + "mov x27, x23", + "mov x24, x5", + "subs x25, x24, #0x1 (1)", + "mov x5, x25", + "sub x24, x21, #0x8 (8)", + "mov x11, x24", + "sub x21, x20, #0x8 (8)", + "mov x10, x21", + "ccmp x23, x22, #nZcv, ne", + "b.ne #-0x38", + "mov x20, x26", + "mov x21, x27", + "eor w22, w21, w20", + "mov x27, x22", + "subs x22, x21, x20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "test al, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xa8", "ExpectedArm64ASM": [ - "and w26, w4, #0x1", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "and w21, w20, #0x1", + "cmn wzr, w21, lsl #24", + "mov x26, x21" ] }, "test ax, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xa9", "ExpectedArm64ASM": [ - "and w26, w4, #0x1", - "cmn wzr, w26, lsl #16" + "mov x20, x4", + "and w21, w20, #0x1", + "cmn wzr, w21, lsl #16", + "mov x26, x21" ] }, "test eax, 1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xa9", "ExpectedArm64ASM": [ - "ands w26, w4, #0x1" + "mov x20, x4", + "ands w21, w20, #0x1", + "mov x26, x21" ] }, "test rax, 1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xa9", "ExpectedArm64ASM": [ - "ands x26, x4, #0x1" + "mov x20, x4", + "ands x21, x20, #0x1", + "mov x26, x21" ] }, "test al, -1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xa8", "ExpectedArm64ASM": [ - "mov x26, x4", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "mov x21, x20", + "cmn wzr, w21, lsl #24", + "mov x26, x21" ] }, "test ax, -1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xa9", "ExpectedArm64ASM": [ - "mov x26, x4", - "cmn wzr, w26, lsl #16" + "mov x20, x4", + "mov x21, x20", + "cmn wzr, w21, lsl #16", + "mov x26, x21" ] }, "test eax, -1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xa9", "ExpectedArm64ASM": [ - "ands w26, w4, w4" + "mov x20, x4", + "ands w21, w20, w20", + "mov x26, x21" ] }, "test rax, -1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xa9", "ExpectedArm64ASM": [ - "ands x26, x4, x4" + "mov x20, x4", + "ands x21, x20, x20", + "mov x26, x21" ] }, "stosb": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0xaa", "ExpectedArm64ASM": [ - "strb w4, [x11]", + "mov x20, x4", + "mov x21, x11", + "strb w20, [x21]", "ldrsb x20, [x28, #714]", - "add x11, x11, x20" + "add x22, x21, x20", + "mov x11, x22" ] }, "stosw": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0xab", "ExpectedArm64ASM": [ - "strh w4, [x11]", + "mov x20, x4", + "mov x21, x11", + "strh w20, [x21]", "ldrsb x20, [x28, #714]", - "add x11, x11, x20, lsl #1" + "add x22, x21, x20, lsl #1", + "mov x11, x22" ] }, "stosd": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0xab", "ExpectedArm64ASM": [ - "str w4, [x11]", + "mov x20, x4", + "mov x21, x11", + "str w20, [x21]", "ldrsb x20, [x28, #714]", - "add x11, x11, x20, lsl #2" + "add x22, x21, x20, lsl #2", + "mov x11, x22" ] }, "stosq": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0xab", "ExpectedArm64ASM": [ - "str x4, [x11]", + "mov x20, x4", + "mov x21, x11", + "str x20, [x21]", "ldrsb x20, [x28, #714]", - "add x11, x11, x20, lsl #3" + "add x22, x21, x20, lsl #3", + "mov x11, x22" ] }, "rep stosb": { - "ExpectedInstructionCount": 55, + "ExpectedInstructionCount": 60, "Comment": "0xaa", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "ldrsb x21, [x28, #714]", - "mov x0, x5", - "mov x1, x11", - "tbnz w21, #1, #+0x64", + "mov x20, x4", + "uxtb w21, w20", + "mov x20, x11", + "mov x22, x5", + "ldrsb x23, [x28, #714]", + "mov x0, x22", + "mov x1, x20", + "tbnz w23, #1, #+0x64", "cbz x0, #+0x58", "sub x0, x0, #0x20 (32)", "tbnz x0, #63, #+0x3c", - "dup v1.16b, w20", + "dup v1.16b, w21", "sub x0, x0, #0x20 (32)", "tbnz x0, #63, #+0x14", "stp q1, q1, [x1], #32", @@ -3679,16 +4712,16 @@ "tbz x0, #63, #-0x8", "add x0, x0, #0x20 (32)", "cbz x0, #+0x10", - "strb w20, [x1], #1", + "strb w21, [x1], #1", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0x8", - "add x11, x11, x5", + "add x24, x20, x22", "b #+0x68", "cbz x0, #+0x60", "sub x1, x1, #0x1f (31)", "sub x0, x0, #0x20 (32)", "tbnz x0, #63, #+0x3c", - "dup v1.16b, w20", + "dup v1.16b, w21", "sub x0, x0, #0x20 (32)", "tbnz x0, #63, #+0x14", "stp q1, q1, [x1], #-32", @@ -3705,26 +4738,31 @@ "add x0, x0, #0x20 (32)", "cbz x0, #+0x14", "add x1, x1, #0x1f (31)", - "strb w20, [x1], #-1", + "strb w21, [x1], #-1", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0x8", - "sub x11, x11, x5", - "mov w5, #0x0" + "sub x24, x20, x22", + "mov w20, #0x0", + "mov x5, x20", + "mov x11, x24" ] }, "rep stosw": { - "ExpectedInstructionCount": 55, + "ExpectedInstructionCount": 60, "Comment": "0xab", "ExpectedArm64ASM": [ - "uxth w20, w4", - "ldrsb x21, [x28, #714]", - "mov x0, x5", - "mov x1, x11", - "tbnz w21, #1, #+0x64", + "mov x20, x4", + "uxth w21, w20", + "mov x20, x11", + "mov x22, x5", + "ldrsb x23, [x28, #714]", + "mov x0, x22", + "mov x1, x20", + "tbnz w23, #1, #+0x64", "cbz x0, #+0x58", "sub x0, x0, #0x10 (16)", "tbnz x0, #63, #+0x3c", - "dup v1.8h, w20", + "dup v1.8h, w21", "sub x0, x0, #0x10 (16)", "tbnz x0, #63, #+0x14", "stp q1, q1, [x1], #32", @@ -3740,16 +4778,16 @@ "tbz x0, #63, #-0x8", "add x0, x0, #0x10 (16)", "cbz x0, #+0x10", - "strh w20, [x1], #2", + "strh w21, [x1], #2", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0x8", - "add x11, x11, x5, lsl #1", + "add x24, x20, x22, lsl #1", "b #+0x68", "cbz x0, #+0x60", "sub x1, x1, #0x1e (30)", "sub x0, x0, #0x10 (16)", "tbnz x0, #63, #+0x3c", - "dup v1.8h, w20", + "dup v1.8h, w21", "sub x0, x0, #0x10 (16)", "tbnz x0, #63, #+0x14", "stp q1, q1, [x1], #-32", @@ -3766,26 +4804,31 @@ "add x0, x0, #0x10 (16)", "cbz x0, #+0x14", "add x1, x1, #0x1e (30)", - "strh w20, [x1], #-2", + "strh w21, [x1], #-2", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0x8", - "sub x11, x11, x5, lsl #1", - "mov w5, #0x0" + "sub x24, x20, x22, lsl #1", + "mov w20, #0x0", + "mov x5, x20", + "mov x11, x24" ] }, "rep stosd": { - "ExpectedInstructionCount": 55, + "ExpectedInstructionCount": 60, "Comment": "0xab", "ExpectedArm64ASM": [ - "mov w20, w4", - "ldrsb x21, [x28, #714]", - "mov x0, x5", - "mov x1, x11", - "tbnz w21, #1, #+0x64", + "mov x20, x4", + "mov w21, w20", + "mov x20, x11", + "mov x22, x5", + "ldrsb x23, [x28, #714]", + "mov x0, x22", + "mov x1, x20", + "tbnz w23, #1, #+0x64", "cbz x0, #+0x58", "sub x0, x0, #0x8 (8)", "tbnz x0, #63, #+0x3c", - "dup v1.4s, w20", + "dup v1.4s, w21", "sub x0, x0, #0x8 (8)", "tbnz x0, #63, #+0x14", "stp q1, q1, [x1], #32", @@ -3801,16 +4844,16 @@ "tbz x0, #63, #-0x8", "add x0, x0, #0x8 (8)", "cbz x0, #+0x10", - "str w20, [x1], #4", + "str w21, [x1], #4", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0x8", - "add x11, x11, x5, lsl #2", + "add x24, x20, x22, lsl #2", "b #+0x68", "cbz x0, #+0x60", "sub x1, x1, #0x1c (28)", "sub x0, x0, #0x8 (8)", "tbnz x0, #63, #+0x3c", - "dup v1.4s, w20", + "dup v1.4s, w21", "sub x0, x0, #0x8 (8)", "tbnz x0, #63, #+0x14", "stp q1, q1, [x1], #-32", @@ -3827,29 +4870,34 @@ "add x0, x0, #0x8 (8)", "cbz x0, #+0x14", "add x1, x1, #0x1c (28)", - "str w20, [x1], #-4", + "str w21, [x1], #-4", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0x8", - "sub x11, x11, x5, lsl #2", - "mov w5, #0x0" + "sub x24, x20, x22, lsl #2", + "mov w20, #0x0", + "mov x5, x20", + "mov x11, x24" ] }, "rep stosq": { - "ExpectedInstructionCount": 54, + "ExpectedInstructionCount": 59, "Comment": [ "Unrolling the loop for faster memset can be done.", "Taking advantage of ARM MOPs instructions can be done", "0xab" ], "ExpectedArm64ASM": [ - "ldrsb x20, [x28, #714]", - "mov x0, x5", - "mov x1, x11", - "tbnz w20, #1, #+0x64", + "mov x20, x4", + "mov x21, x11", + "mov x22, x5", + "ldrsb x23, [x28, #714]", + "mov x0, x22", + "mov x1, x21", + "tbnz w23, #1, #+0x64", "cbz x0, #+0x58", "sub x0, x0, #0x4 (4)", "tbnz x0, #63, #+0x3c", - "dup v1.2d, x4", + "dup v1.2d, x20", "sub x0, x0, #0x4 (4)", "tbnz x0, #63, #+0x14", "stp q1, q1, [x1], #32", @@ -3865,16 +4913,16 @@ "tbz x0, #63, #-0x8", "add x0, x0, #0x4 (4)", "cbz x0, #+0x10", - "str x4, [x1], #8", + "str x20, [x1], #8", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0x8", - "add x11, x11, x5, lsl #3", + "add x24, x21, x22, lsl #3", "b #+0x68", "cbz x0, #+0x60", "sub x1, x1, #0x18 (24)", "sub x0, x0, #0x4 (4)", "tbnz x0, #63, #+0x3c", - "dup v1.2d, x4", + "dup v1.2d, x20", "sub x0, x0, #0x4 (4)", "tbnz x0, #63, #+0x14", "stp q1, q1, [x1], #-32", @@ -3891,535 +4939,775 @@ "add x0, x0, #0x4 (4)", "cbz x0, #+0x14", "add x1, x1, #0x18 (24)", - "str x4, [x1], #-8", + "str x20, [x1], #-8", "sub x0, x0, #0x1 (1)", "cbnz x0, #-0x8", - "sub x11, x11, x5, lsl #3", - "mov w5, #0x0" + "sub x24, x21, x22, lsl #3", + "mov w20, #0x0", + "mov x5, x20", + "mov x11, x24" ] }, "lodsb": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 9, "Comment": "0xac", "ExpectedArm64ASM": [ - "ldrb w20, [x10]", - "bfxil x4, x20, #0, #8", - "ldrsb x20, [x28, #714]", - "add x10, x10, x20" + "mov x20, x10", + "ldrb w21, [x20]", + "mov x22, x4", + "mov x23, x22", + "bfxil x23, x21, #0, #8", + "mov x4, x23", + "ldrsb x21, [x28, #714]", + "add x22, x20, x21", + "mov x10, x22" ] }, "lodsw": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 9, "Comment": "0xad", "ExpectedArm64ASM": [ - "ldrh w20, [x10]", - "bfxil x4, x20, #0, #16", - "ldrsb x20, [x28, #714]", - "add x10, x10, x20, lsl #1" + "mov x20, x10", + "ldrh w21, [x20]", + "mov x22, x4", + "mov x23, x22", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "ldrsb x21, [x28, #714]", + "add x22, x20, x21, lsl #1", + "mov x10, x22" ] }, "lodsd": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0xad", "ExpectedArm64ASM": [ - "ldr w4, [x10]", - "ldrsb x20, [x28, #714]", - "add x10, x10, x20, lsl #2" + "mov x20, x10", + "ldr w21, [x20]", + "mov x4, x21", + "ldrsb x21, [x28, #714]", + "add x22, x20, x21, lsl #2", + "mov x10, x22" ] }, "lodsq": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0xad", "ExpectedArm64ASM": [ - "ldr x4, [x10]", - "ldrsb x20, [x28, #714]", - "add x10, x10, x20, lsl #3" + "mov x20, x10", + "ldr x21, [x20]", + "mov x4, x21", + "ldrsb x21, [x28, #714]", + "add x22, x20, x21, lsl #3", + "mov x10, x22" ] }, "rep lodsb": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 33, "Comment": "0xac", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x20", - "cbz x5, #+0x18", - "ldrb w20, [x10]", - "bfxil x4, x20, #0, #8", - "sub x5, x5, #0x1 (1)", - "add x10, x10, #0x1 (1)", - "b #-0x14", - "b #+0x1c", - "cbz x5, #+0x18", - "ldrb w20, [x10]", - "bfxil x4, x20, #0, #8", - "sub x5, x5, #0x1 (1)", - "sub x10, x10, #0x1 (1)", - "b #-0x14" + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x40", + "mov x20, x5", + "cbz x20, #+0x34", + "mov x20, x10", + "ldrb w21, [x20]", + "mov x22, x4", + "mov x23, x22", + "bfxil x23, x21, #0, #8", + "mov x4, x23", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x1 (1)", + "mov x10, x21", + "b #-0x34", + "b #+0x3c", + "mov x20, x5", + "cbz x20, #+0x34", + "mov x20, x10", + "ldrb w21, [x20]", + "mov x22, x4", + "mov x23, x22", + "bfxil x23, x21, #0, #8", + "mov x4, x23", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x1 (1)", + "mov x10, x21", + "b #-0x34" ] }, "rep lodsw": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 33, "Comment": "0xad", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x20", - "cbz x5, #+0x18", - "ldrh w20, [x10]", - "bfxil x4, x20, #0, #16", - "sub x5, x5, #0x1 (1)", - "add x10, x10, #0x2 (2)", - "b #-0x14", - "b #+0x1c", - "cbz x5, #+0x18", - "ldrh w20, [x10]", - "bfxil x4, x20, #0, #16", - "sub x5, x5, #0x1 (1)", - "sub x10, x10, #0x2 (2)", - "b #-0x14" + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x40", + "mov x20, x5", + "cbz x20, #+0x34", + "mov x20, x10", + "ldrh w21, [x20]", + "mov x22, x4", + "mov x23, x22", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x2 (2)", + "mov x10, x21", + "b #-0x34", + "b #+0x3c", + "mov x20, x5", + "cbz x20, #+0x34", + "mov x20, x10", + "ldrh w21, [x20]", + "mov x22, x4", + "mov x23, x22", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x2 (2)", + "mov x10, x21", + "b #-0x34" ] }, "rep lodsd": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 27, "Comment": "0xad", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x1c", - "cbz x5, #+0x14", - "ldr w4, [x10]", - "sub x5, x5, #0x1 (1)", - "add x10, x10, #0x4 (4)", - "b #-0x10", - "b #+0x18", - "cbz x5, #+0x14", - "ldr w4, [x10]", - "sub x5, x5, #0x1 (1)", - "sub x10, x10, #0x4 (4)", - "b #-0x10" + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x34", + "mov x20, x5", + "cbz x20, #+0x28", + "mov x20, x10", + "ldr w21, [x20]", + "mov x4, x21", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x4 (4)", + "mov x10, x21", + "b #-0x28", + "b #+0x30", + "mov x20, x5", + "cbz x20, #+0x28", + "mov x20, x10", + "ldr w21, [x20]", + "mov x4, x21", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x4 (4)", + "mov x10, x21", + "b #-0x28" ] }, "rep lodsq": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 27, "Comment": "0xad", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x1c", - "cbz x5, #+0x14", - "ldr x4, [x10]", - "sub x5, x5, #0x1 (1)", - "add x10, x10, #0x8 (8)", - "b #-0x10", - "b #+0x18", - "cbz x5, #+0x14", - "ldr x4, [x10]", - "sub x5, x5, #0x1 (1)", - "sub x10, x10, #0x8 (8)", - "b #-0x10" + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x34", + "mov x20, x5", + "cbz x20, #+0x28", + "mov x20, x10", + "ldr x21, [x20]", + "mov x4, x21", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x8 (8)", + "mov x10, x21", + "b #-0x28", + "b #+0x30", + "mov x20, x5", + "cbz x20, #+0x28", + "mov x20, x10", + "ldr x21, [x20]", + "mov x4, x21", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x8 (8)", + "mov x10, x21", + "b #-0x28" ] }, "scasb": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 15, "Comment": "0xae", "ExpectedArm64ASM": [ - "ldrb w20, [x11]", - "ldrsb x21, [x28, #714]", - "add x11, x11, x21", - "eor w27, w4, w20", - "lsl w0, w4, #24", - "cmp w0, w20, lsl #24", - "sub w26, w4, w20", + "mov x20, x11", + "mov x21, x4", + "ldrb w22, [x20]", + "ldrsb x23, [x28, #714]", + "add x24, x20, x23", + "mov x11, x24", + "eor w20, w21, w22", + "mov x27, x20", + "lsl w0, w21, #24", + "cmp w0, w22, lsl #24", + "sub w20, w21, w22", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "scasw": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 15, "Comment": "0xaf", "ExpectedArm64ASM": [ - "ldrh w20, [x11]", - "ldrsb x21, [x28, #714]", - "add x11, x11, x21, lsl #1", - "eor w27, w4, w20", - "lsl w0, w4, #16", - "cmp w0, w20, lsl #16", - "sub w26, w4, w20", + "mov x20, x11", + "mov x21, x4", + "ldrh w22, [x20]", + "ldrsb x23, [x28, #714]", + "add x24, x20, x23, lsl #1", + "mov x11, x24", + "eor w20, w21, w22", + "mov x27, x20", + "lsl w0, w21, #16", + "cmp w0, w22, lsl #16", + "sub w20, w21, w22", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "scasd": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 13, "Comment": "0xaf", "ExpectedArm64ASM": [ - "ldr w20, [x11]", - "ldrsb x21, [x28, #714]", - "add x11, x11, x21, lsl #2", - "eor w27, w4, w20", - "subs w26, w4, w20", + "mov x20, x11", + "mov x21, x4", + "ldr w22, [x20]", + "ldrsb x23, [x28, #714]", + "add x24, x20, x23, lsl #2", + "mov x11, x24", + "eor w20, w21, w22", + "mov x27, x20", + "subs w20, w21, w22", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "scasq": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 13, "Comment": "0xaf", "ExpectedArm64ASM": [ - "ldr x20, [x11]", - "ldrsb x21, [x28, #714]", - "add x11, x11, x21, lsl #3", - "eor w27, w4, w20", - "subs x26, x4, x20", + "mov x20, x11", + "mov x21, x4", + "ldr x22, [x20]", + "ldrsb x23, [x28, #714]", + "add x24, x20, x23, lsl #3", + "mov x11, x24", + "eor w20, w21, w22", + "mov x27, x20", + "subs x20, x21, x22", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "repz scasb": { - "ExpectedInstructionCount": 29, + "ExpectedInstructionCount": 45, "Comment": "0xae", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x38", - "cbz x5, #+0x30", - "ldrb w20, [x11]", - "eor w27, w4, w20", - "lsl w0, w4, #24", - "cmp w0, w20, lsl #24", - "sub w26, w4, w20", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sub x5, x5, #0x1 (1)", - "add x11, x11, #0x1 (1)", - "b.eq #-0x2c", - "b #+0x34", - "cbz x5, #+0x30", - "ldrb w20, [x11]", - "eor w27, w4, w20", - "lsl w0, w4, #24", - "cmp w0, w20, lsl #24", - "sub w26, w4, w20", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sub x5, x5, #0x1 (1)", - "sub x11, x11, #0x1 (1)", - "b.eq #-0x2c" + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x58", + "mov x20, x5", + "cbz x20, #+0x4c", + "mov x20, x11", + "mov x21, x4", + "ldrb w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "lsl w0, w21, #24", + "cmp w0, w22, lsl #24", + "sub w23, w21, w22", + "mov x26, x23", + "mrs x21, nzcv", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x1 (1)", + "mov x11, x21", + "b.eq #-0x4c", + "b #+0x54", + "mov x20, x5", + "cbz x20, #+0x4c", + "mov x20, x11", + "mov x21, x4", + "ldrb w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "lsl w0, w21, #24", + "cmp w0, w22, lsl #24", + "sub w23, w21, w22", + "mov x26, x23", + "mrs x21, nzcv", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x1 (1)", + "mov x11, x21", + "b.eq #-0x4c" ] }, "repz scasw": { - "ExpectedInstructionCount": 29, + "ExpectedInstructionCount": 45, "Comment": "0xaf", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x38", - "cbz x5, #+0x30", - "ldrh w20, [x11]", - "eor w27, w4, w20", - "lsl w0, w4, #16", - "cmp w0, w20, lsl #16", - "sub w26, w4, w20", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sub x5, x5, #0x1 (1)", - "add x11, x11, #0x2 (2)", - "b.eq #-0x2c", - "b #+0x34", - "cbz x5, #+0x30", - "ldrh w20, [x11]", - "eor w27, w4, w20", - "lsl w0, w4, #16", - "cmp w0, w20, lsl #16", - "sub w26, w4, w20", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sub x5, x5, #0x1 (1)", - "sub x11, x11, #0x2 (2)", - "b.eq #-0x2c" + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x58", + "mov x20, x5", + "cbz x20, #+0x4c", + "mov x20, x11", + "mov x21, x4", + "ldrh w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "lsl w0, w21, #16", + "cmp w0, w22, lsl #16", + "sub w23, w21, w22", + "mov x26, x23", + "mrs x21, nzcv", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x2 (2)", + "mov x11, x21", + "b.eq #-0x4c", + "b #+0x54", + "mov x20, x5", + "cbz x20, #+0x4c", + "mov x20, x11", + "mov x21, x4", + "ldrh w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "lsl w0, w21, #16", + "cmp w0, w22, lsl #16", + "sub w23, w21, w22", + "mov x26, x23", + "mrs x21, nzcv", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x2 (2)", + "mov x11, x21", + "b.eq #-0x4c" ] }, "repz scasd": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 41, "Comment": "0xaf", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x30", - "cbz x5, #+0x28", - "ldr w20, [x11]", - "eor w27, w4, w20", - "subs w26, w4, w20", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sub x5, x5, #0x1 (1)", - "add x11, x11, #0x4 (4)", - "b.eq #-0x24", - "b #+0x2c", - "cbz x5, #+0x28", - "ldr w20, [x11]", - "eor w27, w4, w20", - "subs w26, w4, w20", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sub x5, x5, #0x1 (1)", - "sub x11, x11, #0x4 (4)", - "b.eq #-0x24" + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x50", + "mov x20, x5", + "cbz x20, #+0x44", + "mov x20, x11", + "mov x21, x4", + "ldr w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "subs w23, w21, w22", + "mov x26, x23", + "mrs x21, nzcv", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x4 (4)", + "mov x11, x21", + "b.eq #-0x44", + "b #+0x4c", + "mov x20, x5", + "cbz x20, #+0x44", + "mov x20, x11", + "mov x21, x4", + "ldr w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "subs w23, w21, w22", + "mov x26, x23", + "mrs x21, nzcv", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x4 (4)", + "mov x11, x21", + "b.eq #-0x44" ] }, "repz scasq": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 41, "Comment": "0xaf", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x30", - "cbz x5, #+0x28", - "ldr x20, [x11]", - "eor w27, w4, w20", - "subs x26, x4, x20", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sub x5, x5, #0x1 (1)", - "add x11, x11, #0x8 (8)", - "b.eq #-0x24", - "b #+0x2c", - "cbz x5, #+0x28", - "ldr x20, [x11]", - "eor w27, w4, w20", - "subs x26, x4, x20", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sub x5, x5, #0x1 (1)", - "sub x11, x11, #0x8 (8)", - "b.eq #-0x24" + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x50", + "mov x20, x5", + "cbz x20, #+0x44", + "mov x20, x11", + "mov x21, x4", + "ldr x22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "subs x23, x21, x22", + "mov x26, x23", + "mrs x21, nzcv", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x8 (8)", + "mov x11, x21", + "b.eq #-0x44", + "b #+0x4c", + "mov x20, x5", + "cbz x20, #+0x44", + "mov x20, x11", + "mov x21, x4", + "ldr x22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "subs x23, x21, x22", + "mov x26, x23", + "mrs x21, nzcv", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x8 (8)", + "mov x11, x21", + "b.eq #-0x44" ] }, "repnz scasb": { - "ExpectedInstructionCount": 29, + "ExpectedInstructionCount": 45, "Comment": "0xae", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x38", - "cbz x5, #+0x30", - "ldrb w20, [x11]", - "eor w27, w4, w20", - "lsl w0, w4, #24", - "cmp w0, w20, lsl #24", - "sub w26, w4, w20", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sub x5, x5, #0x1 (1)", - "add x11, x11, #0x1 (1)", - "b.ne #-0x2c", - "b #+0x34", - "cbz x5, #+0x30", - "ldrb w20, [x11]", - "eor w27, w4, w20", - "lsl w0, w4, #24", - "cmp w0, w20, lsl #24", - "sub w26, w4, w20", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sub x5, x5, #0x1 (1)", - "sub x11, x11, #0x1 (1)", - "b.ne #-0x2c" + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x58", + "mov x20, x5", + "cbz x20, #+0x4c", + "mov x20, x11", + "mov x21, x4", + "ldrb w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "lsl w0, w21, #24", + "cmp w0, w22, lsl #24", + "sub w23, w21, w22", + "mov x26, x23", + "mrs x21, nzcv", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x1 (1)", + "mov x11, x21", + "b.ne #-0x4c", + "b #+0x54", + "mov x20, x5", + "cbz x20, #+0x4c", + "mov x20, x11", + "mov x21, x4", + "ldrb w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "lsl w0, w21, #24", + "cmp w0, w22, lsl #24", + "sub w23, w21, w22", + "mov x26, x23", + "mrs x21, nzcv", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x1 (1)", + "mov x11, x21", + "b.ne #-0x4c" ] }, "repnz scasw": { - "ExpectedInstructionCount": 29, + "ExpectedInstructionCount": 45, "Comment": "0xaf", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x38", - "cbz x5, #+0x30", - "ldrh w20, [x11]", - "eor w27, w4, w20", - "lsl w0, w4, #16", - "cmp w0, w20, lsl #16", - "sub w26, w4, w20", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sub x5, x5, #0x1 (1)", - "add x11, x11, #0x2 (2)", - "b.ne #-0x2c", - "b #+0x34", - "cbz x5, #+0x30", - "ldrh w20, [x11]", - "eor w27, w4, w20", - "lsl w0, w4, #16", - "cmp w0, w20, lsl #16", - "sub w26, w4, w20", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sub x5, x5, #0x1 (1)", - "sub x11, x11, #0x2 (2)", - "b.ne #-0x2c" + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x58", + "mov x20, x5", + "cbz x20, #+0x4c", + "mov x20, x11", + "mov x21, x4", + "ldrh w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "lsl w0, w21, #16", + "cmp w0, w22, lsl #16", + "sub w23, w21, w22", + "mov x26, x23", + "mrs x21, nzcv", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x2 (2)", + "mov x11, x21", + "b.ne #-0x4c", + "b #+0x54", + "mov x20, x5", + "cbz x20, #+0x4c", + "mov x20, x11", + "mov x21, x4", + "ldrh w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "lsl w0, w21, #16", + "cmp w0, w22, lsl #16", + "sub w23, w21, w22", + "mov x26, x23", + "mrs x21, nzcv", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x2 (2)", + "mov x11, x21", + "b.ne #-0x4c" ] }, "repnz scasd": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 41, "Comment": "0xaf", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x30", - "cbz x5, #+0x28", - "ldr w20, [x11]", - "eor w27, w4, w20", - "subs w26, w4, w20", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sub x5, x5, #0x1 (1)", - "add x11, x11, #0x4 (4)", - "b.ne #-0x24", - "b #+0x2c", - "cbz x5, #+0x28", - "ldr w20, [x11]", - "eor w27, w4, w20", - "subs w26, w4, w20", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sub x5, x5, #0x1 (1)", - "sub x11, x11, #0x4 (4)", - "b.ne #-0x24" + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x50", + "mov x20, x5", + "cbz x20, #+0x44", + "mov x20, x11", + "mov x21, x4", + "ldr w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "subs w23, w21, w22", + "mov x26, x23", + "mrs x21, nzcv", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x4 (4)", + "mov x11, x21", + "b.ne #-0x44", + "b #+0x4c", + "mov x20, x5", + "cbz x20, #+0x44", + "mov x20, x11", + "mov x21, x4", + "ldr w22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "subs w23, w21, w22", + "mov x26, x23", + "mrs x21, nzcv", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x4 (4)", + "mov x11, x21", + "b.ne #-0x44" ] }, "repnz scasq": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 41, "Comment": "0xaf", "ExpectedArm64ASM": [ "ldrsb x20, [x28, #714]", - "lsr x20, x20, #63", - "cbz x20, #+0x8", - "b #+0x30", - "cbz x5, #+0x28", - "ldr x20, [x11]", - "eor w27, w4, w20", - "subs x26, x4, x20", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sub x5, x5, #0x1 (1)", - "add x11, x11, #0x8 (8)", - "b.ne #-0x24", - "b #+0x2c", - "cbz x5, #+0x28", - "ldr x20, [x11]", - "eor w27, w4, w20", - "subs x26, x4, x20", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20", - "sub x5, x5, #0x1 (1)", - "sub x11, x11, #0x8 (8)", - "b.ne #-0x24" + "lsr x21, x20, #63", + "cbz x21, #+0x8", + "b #+0x50", + "mov x20, x5", + "cbz x20, #+0x44", + "mov x20, x11", + "mov x21, x4", + "ldr x22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "subs x23, x21, x22", + "mov x26, x23", + "mrs x21, nzcv", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "add x21, x20, #0x8 (8)", + "mov x11, x21", + "b.ne #-0x44", + "b #+0x4c", + "mov x20, x5", + "cbz x20, #+0x44", + "mov x20, x11", + "mov x21, x4", + "ldr x22, [x20]", + "eor w23, w21, w22", + "mov x27, x23", + "subs x23, x21, x22", + "mov x26, x23", + "mrs x21, nzcv", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", + "mov x21, x5", + "sub x22, x21, #0x1 (1)", + "mov x5, x22", + "sub x21, x20, #0x8 (8)", + "mov x11, x21", + "b.ne #-0x44" ] }, "mov al, 0xff": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xb0", "ExpectedArm64ASM": [ - "orr x4, x4, #0xff" + "mov x20, x4", + "orr x21, x20, #0xff", + "mov x4, x21" ] }, "mov al, 0x82": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0xb0", "ExpectedArm64ASM": [ "mov w20, #0x82", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "mov ax, 0xffff": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xb8", "ExpectedArm64ASM": [ - "orr x4, x4, #0xffff" + "mov x20, x4", + "orr x21, x20, #0xffff", + "mov x4, x21" ] }, "mov ax, 0x4243": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0xb8", "ExpectedArm64ASM": [ "mov w20, #0x4243", - "bfxil x4, x20, #0, #16" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "mov eax, 0xffffffff": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "movz+movk doesn't turn in to bitfield move", "0xb8" ], "ExpectedArm64ASM": [ - "mov w4, #0xffffffff" + "mov w20, #0xffffffff", + "mov x4, x20" ] }, "mov eax, 0x44454647": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "0xb8", "ExpectedArm64ASM": [ - "mov w4, #0x4647", - "movk w4, #0x4445, lsl #16" + "mov w20, #0x4647", + "movk w20, #0x4445, lsl #16", + "mov x4, x20" ] }, "mov rax, 0xffffffffffffffff": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0xb8", "ExpectedArm64ASM": [ - "mov x4, #0xffffffffffffffff" + "mov x20, #0xffffffffffffffff", + "mov x4, x20" ] }, "mov rax, 0x5152535455565758": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "0xb8", "ExpectedArm64ASM": [ - "mov x4, #0x5758", - "movk x4, #0x5556, lsl #16", - "movk x4, #0x5354, lsl #32", - "movk x4, #0x5152, lsl #48" + "mov x20, #0x5758", + "movk x20, #0x5556, lsl #16", + "movk x20, #0x5354, lsl #32", + "movk x20, #0x5152, lsl #48", + "mov x4, x20" ] }, "xlat": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0xd7", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "ldrb w20, [x7, x20, sxtx]", - "bfxil x4, x20, #0, #8" + "mov x20, x7", + "mov x21, x4", + "uxtb w22, w21", + "ldrb w23, [x20, x22, sxtx]", + "mov x20, x21", + "bfxil x20, x23, #0, #8", + "mov x4, x20" ] }, "cmc": { @@ -4427,8 +5715,8 @@ "Comment": "0xf5", "ExpectedArm64ASM": [ "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "clc": { @@ -4436,8 +5724,8 @@ "Comment": "0xf8", "ExpectedArm64ASM": [ "mrs x20, nzcv", - "and w20, w20, #0xdfffffff", - "msr nzcv, x20" + "and w21, w20, #0xdfffffff", + "msr nzcv, x21" ] }, "stc": { @@ -4445,8 +5733,8 @@ "Comment": "0xf9", "ExpectedArm64ASM": [ "mrs x20, nzcv", - "orr w20, w20, #0x20000000", - "msr nzcv, x20" + "orr w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cli": { diff --git a/unittests/InstructionCountCI/PrimaryGroup.json b/unittests/InstructionCountCI/PrimaryGroup.json index 750f446f03..7424186cd3 100644 --- a/unittests/InstructionCountCI/PrimaryGroup.json +++ b/unittests/InstructionCountCI/PrimaryGroup.json @@ -15,1656 +15,1993 @@ ], "Instructions": { "add al, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x80 /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #24", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #24", "cmn w0, w20, lsl #24", - "add w26, w27, #0x1 (1)", - "mov x4, x27", - "bfxil x4, x26, #0, #8" + "add w20, w21, #0x1 (1)", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "or al, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x80 /1", "ExpectedArm64ASM": [ - "orr w26, w4, #0x1", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "orr w21, w20, #0x1", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "adc al, 1": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 23, "Comment": "GROUP1 0x80 /2", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "cset w21, hs", - "adc w20, w27, w20", - "uxtb w26, w20", - "cmp w26, #0x1 (1)", - "cset x20, lo", - "cmp w26, #0x1 (1)", - "cset x22, ls", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w26, w27", - "ubfx x21, x21, #7, #1", - "orr w20, w20, w21, lsl #28", - "mov x4, x27", - "bfxil x4, x26, #0, #8", - "msr nzcv, x20" + "mov x21, x4", + "mov x27, x21", + "cset w22, hs", + "adc w23, w21, w20", + "uxtb w20, w23", + "cmp w20, #0x1 (1)", + "cset x23, lo", + "cmp w20, #0x1 (1)", + "cset x24, ls", + "cmp x22, #0x1 (1)", + "csel x25, x24, x23, eq", + "cmn wzr, w20, lsl #24", + "mrs x22, nzcv", + "orr w23, w22, w25, lsl #29", + "bic w22, w20, w21", + "ubfx x24, x22, #7, #1", + "orr w22, w23, w24, lsl #28", + "mov x26, x20", + "mov x23, x21", + "bfxil x23, x20, #0, #8", + "mov x4, x23", + "msr nzcv, x22" ] }, "sbb al, 1": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 24, "Comment": "GROUP1 0x80 /3", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "cset w21, hs", - "add w20, w20, w21", - "sub w20, w27, w20", - "uxtb w26, w20", - "cmp x26, x27", + "mov x21, x4", + "mov x27, x21", + "cset w22, hs", + "add w23, w20, w22", + "sub w20, w21, w23", + "uxtb w23, w20", + "cmp x23, x21", "cset x20, hi", - "cmp x26, x27", - "cset x22, hs", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w27, w26", - "ubfx x21, x21, #7, #1", - "orr w20, w20, w21, lsl #28", - "mov x4, x27", - "bfxil x4, x26, #0, #8", + "cmp x23, x21", + "cset x24, hs", + "cmp x22, #0x1 (1)", + "csel x25, x24, x20, eq", + "cmn wzr, w23, lsl #24", + "mrs x20, nzcv", + "orr w22, w20, w25, lsl #29", + "bic w20, w21, w23", + "ubfx x24, x20, #7, #1", + "orr w20, w22, w24, lsl #28", + "mov x26, x23", + "mov x22, x21", + "bfxil x22, x23, #0, #8", + "mov x4, x22", "msr nzcv, x20" ] }, "and al, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x80 /4", "ExpectedArm64ASM": [ - "and w26, w4, #0x1", - "cmn wzr, w26, lsl #24", - "bfxil x4, x26, #0, #8" + "mov x20, x4", + "and w21, w20, #0x1", + "cmn wzr, w21, lsl #24", + "mov x26, x21", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22" ] }, "sub al, 1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 13, "Comment": "GROUP1 0x80 /5", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #24", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #24", "cmp w0, w20, lsl #24", - "sub w26, w27, #0x1 (1)", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x27", - "bfxil x4, x26, #0, #8", - "msr nzcv, x20" + "sub w20, w21, #0x1 (1)", + "mov x26, x20", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22", + "msr nzcv, x23" ] }, "xor al, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x80 /6", "ExpectedArm64ASM": [ - "eor w26, w4, #0x1", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "eor w21, w20, #0x1", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "cmp al, 1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x80 /7", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #24", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #24", "cmp w0, w20, lsl #24", - "sub w26, w27, #0x1 (1)", + "sub w20, w21, #0x1 (1)", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "add al, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": "GROUP1 0x80 /0", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "lsl w0, w4, #24", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #24", "cmn w0, w20, lsl #24", - "add w26, w4, #0xff (255)", - "bfxil x4, x26, #0, #8" + "add w20, w21, #0xff (255)", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "or al, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x80 /1", "ExpectedArm64ASM": [ - "orr w26, w4, #0xff", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "orr w21, w20, #0xff", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "adc al, -1": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 24, "Comment": "GROUP1 0x80 /2", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "cset w21, hs", - "adc w20, w4, w20", - "uxtb w26, w20", - "cmp w26, #0xff (255)", - "cset x20, lo", - "cmp w26, #0xff (255)", - "cset x22, ls", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w4, w26", - "ubfx x21, x21, #7, #1", - "orr w20, w20, w21, lsl #28", - "bfxil x4, x26, #0, #8", - "msr nzcv, x20" + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "cset w22, hs", + "adc w23, w21, w20", + "uxtb w20, w23", + "cmp w20, #0xff (255)", + "cset x23, lo", + "cmp w20, #0xff (255)", + "cset x24, ls", + "cmp x22, #0x1 (1)", + "csel x25, x24, x23, eq", + "cmn wzr, w20, lsl #24", + "mrs x22, nzcv", + "orr w23, w22, w25, lsl #29", + "bic w22, w21, w20", + "ubfx x24, x22, #7, #1", + "orr w22, w23, w24, lsl #28", + "mov x26, x20", + "mov x23, x21", + "bfxil x23, x20, #0, #8", + "mov x4, x23", + "msr nzcv, x22" ] }, "sbb al, -1": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 25, "Comment": "GROUP1 0x80 /3", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "cset w21, hs", - "add w20, w20, w21", - "sub w20, w4, w20", - "uxtb w26, w20", - "cmp x26, x4", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "cset w22, hs", + "add w23, w20, w22", + "sub w20, w21, w23", + "uxtb w23, w20", + "cmp x23, x21", "cset x20, hi", - "cmp x26, x4", - "cset x22, hs", - "cmp x21, #0x1 (1)", - "csel x20, x22, x20, eq", - "cmn wzr, w26, lsl #24", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "bic w21, w26, w4", - "ubfx x21, x21, #7, #1", - "orr w20, w20, w21, lsl #28", - "bfxil x4, x26, #0, #8", + "cmp x23, x21", + "cset x24, hs", + "cmp x22, #0x1 (1)", + "csel x25, x24, x20, eq", + "cmn wzr, w23, lsl #24", + "mrs x20, nzcv", + "orr w22, w20, w25, lsl #29", + "bic w20, w23, w21", + "ubfx x24, x20, #7, #1", + "orr w20, w22, w24, lsl #28", + "mov x26, x23", + "mov x22, x21", + "bfxil x22, x23, #0, #8", + "mov x4, x22", "msr nzcv, x20" ] }, "and al, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x80 /4", "ExpectedArm64ASM": [ - "and w26, w4, #0xff", - "cmn wzr, w26, lsl #24", - "bfxil x4, x26, #0, #8" + "mov x20, x4", + "and w21, w20, #0xff", + "cmn wzr, w21, lsl #24", + "mov x26, x21", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22" ] }, "sub al, -1": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 14, "Comment": "GROUP1 0x80 /5", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "lsl w0, w4, #24", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #24", "cmp w0, w20, lsl #24", - "sub w26, w4, #0xff (255)", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "bfxil x4, x26, #0, #8", - "msr nzcv, x20" + "sub w20, w21, #0xff (255)", + "mov x26, x20", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22", + "msr nzcv, x23" ] }, "xor al, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x80 /6", "ExpectedArm64ASM": [ - "eor w26, w4, #0xff", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24" + "mov x20, x4", + "eor w21, w20, #0xff", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mov x26, x21", + "cmn wzr, w21, lsl #24" ] }, "cmp al, -1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "GROUP1 0x80 /7", "ExpectedArm64ASM": [ "mov w20, #0xff", - "mvn w27, w4", - "lsl w0, w4, #24", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #24", "cmp w0, w20, lsl #24", - "sub w26, w4, #0xff (255)", + "sub w20, w21, #0xff (255)", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "add ax, 256": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov w20, #0x100", - "mov x27, x4", - "lsl w0, w27, #16", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #16", "cmn w0, w20, lsl #16", - "add w26, w27, #0x100 (256)", - "mov x4, x27", - "bfxil x4, x26, #0, #16" + "add w20, w21, #0x100 (256)", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "add eax, 256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ - "mov x27, x4", - "adds w26, w27, #0x100 (256)", - "mov x4, x26" + "mov x20, x4", + "mov x27, x20", + "adds w21, w20, #0x100 (256)", + "mov x26, x21", + "mov x4, x21" ] }, "add rax, 256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ - "mov x27, x4", - "adds x26, x27, #0x100 (256)", - "mov x4, x26" + "mov x20, x4", + "mov x27, x20", + "adds x21, x20, #0x100 (256)", + "mov x26, x21", + "mov x4, x21" ] }, "or eax, 256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ - "orr w4, w4, #0x100", - "mov x26, x4", - "tst w4, w4" + "mov x20, x4", + "orr w21, w20, #0x100", + "mov x4, x21", + "mov x26, x21", + "tst w21, w21" ] }, "or rax, 256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ - "orr x4, x4, #0x100", - "mov x26, x4", - "tst x4, x4" + "mov x20, x4", + "orr x21, x20, #0x100", + "mov x4, x21", + "mov x26, x21", + "tst x21, x21" ] }, "adc eax, 256": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov w20, #0x100", - "mov x27, x4", - "adcs w26, w27, w20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adcs w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "adc rax, 256": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov w20, #0x100", - "mov x27, x4", - "adcs x26, x27, x20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adcs x22, x21, x20", + "mov x26, x22", + "mov x4, x22" ] }, "sbb eax, 256": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov w20, #0x100", - "mov x27, x4", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs w26, w27, w20", + "mov x21, x4", + "mov x27, x21", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "sbcs w22, w21, w20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x26, x22", + "mov x4, x22", + "msr nzcv, x21" ] }, "sbb rax, 256": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov w20, #0x100", - "mov x27, x4", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs x26, x27, x20", + "mov x21, x4", + "mov x27, x21", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "sbcs x22, x21, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x26, x22", + "mov x4, x22", + "msr nzcv, x21" ] }, "and eax, 256": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ - "ands w26, w4, #0x100", - "mov x4, x26" + "mov x20, x4", + "ands w21, w20, #0x100", + "mov x26, x21", + "mov x4, x21" ] }, "and rax, 256": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ - "ands x26, x4, #0x100", - "mov x4, x26" + "mov x20, x4", + "ands x21, x20, #0x100", + "mov x26, x21", + "mov x4, x21" ] }, "sub eax, 256": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs w26, w27, #0x100 (256)", + "mov x20, x4", + "mov x27, x20", + "subs w21, w20, #0x100 (256)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x4, x21", + "msr nzcv, x22" ] }, "sub rax, 256": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs x26, x27, #0x100 (256)", + "mov x20, x4", + "mov x27, x20", + "subs x21, x20, #0x100 (256)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x4, x21", + "msr nzcv, x22" ] }, "xor eax, 256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ - "eor w4, w4, #0x100", - "mov x26, x4", - "tst w4, w4" + "mov x20, x4", + "eor w21, w20, #0x100", + "mov x4, x21", + "mov x26, x21", + "tst w21, w21" ] }, "xor rax, 256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ - "eor x4, x4, #0x100", - "mov x26, x4", - "tst x4, x4" + "mov x20, x4", + "eor x21, x20, #0x100", + "mov x4, x21", + "mov x26, x21", + "tst x21, x21" ] }, "cmp eax, 256": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x81 /7", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs w26, w27, #0x100 (256)", + "mov x20, x4", + "mov x27, x20", + "subs w21, w20, #0x100 (256)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmp rax, 256": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x81 /7", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs x26, x27, #0x100 (256)", + "mov x20, x4", + "mov x27, x20", + "subs x21, x20, #0x100 (256)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "add ax, -256": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov w20, #0xff00", - "mov x27, x4", - "lsl w0, w27, #16", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #16", "cmn w0, w20, lsl #16", - "add w26, w27, w20", - "mov x4, x27", - "bfxil x4, x26, #0, #16" + "add w22, w21, w20", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x4, x20" ] }, "add eax, -256": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ "mov w20, #0xffffff00", - "mov x27, x4", - "adds w26, w27, w20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adds w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "add rax, -256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /0", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs x26, x27, #0x100 (256)", - "mov x4, x26" + "mov x20, x4", + "mov x27, x20", + "subs x21, x20, #0x100 (256)", + "mov x26, x21", + "mov x4, x21" ] }, "or eax, -256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ - "orr w4, w4, #0xffffff00", - "mov x26, x4", - "tst w4, w4" + "mov x20, x4", + "orr w21, w20, #0xffffff00", + "mov x4, x21", + "mov x26, x21", + "tst w21, w21" ] }, "or rax, -256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /1", "ExpectedArm64ASM": [ - "orr x4, x4, #0xffffffffffffff00", - "mov x26, x4", - "tst x4, x4" + "mov x20, x4", + "orr x21, x20, #0xffffffffffffff00", + "mov x4, x21", + "mov x26, x21", + "tst x21, x21" ] }, "adc eax, -256": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov w20, #0xffffff00", - "mov x27, x4", - "adcs w26, w27, w20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adcs w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "adc rax, -256": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x81 /2", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffff00", - "mov x27, x4", - "adcs x26, x27, x20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adcs x22, x21, x20", + "mov x26, x22", + "mov x4, x22" ] }, "sbb eax, -256": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov w20, #0xffffff00", - "mov x27, x4", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs w26, w27, w20", + "mov x21, x4", + "mov x27, x21", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "sbcs w22, w21, w20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x26, x22", + "mov x4, x22", + "msr nzcv, x21" ] }, "sbb rax, -256": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "GROUP1 0x81 /3", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffff00", - "mov x27, x4", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs x26, x27, x20", + "mov x21, x4", + "mov x27, x21", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "sbcs x22, x21, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x26, x22", + "mov x4, x22", + "msr nzcv, x21" ] }, "and eax, -256": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ - "ands w26, w4, #0xffffff00", - "mov x4, x26" + "mov x20, x4", + "ands w21, w20, #0xffffff00", + "mov x26, x21", + "mov x4, x21" ] }, "and rax, -256": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP1 0x81 /4", "ExpectedArm64ASM": [ - "ands x26, x4, #0xffffffffffffff00", - "mov x4, x26" + "mov x20, x4", + "ands x21, x20, #0xffffffffffffff00", + "mov x26, x21", + "mov x4, x21" ] }, "sub eax, -256": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ "mov w20, #0xffffff00", - "mov x27, x4", - "subs w26, w27, w20", + "mov x21, x4", + "mov x27, x21", + "subs w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x4, x22", + "msr nzcv, x21" ] }, "sub rax, -256": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x81 /5", "ExpectedArm64ASM": [ - "mov x27, x4", - "adds x26, x27, #0x100 (256)", + "mov x20, x4", + "mov x27, x20", + "adds x21, x20, #0x100 (256)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x4, x21", + "msr nzcv, x22" ] }, "xor eax, -256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ - "eor w4, w4, #0xffffff00", - "mov x26, x4", - "tst w4, w4" + "mov x20, x4", + "eor w21, w20, #0xffffff00", + "mov x4, x21", + "mov x26, x21", + "tst w21, w21" ] }, "xor rax, -256": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x81 /6", "ExpectedArm64ASM": [ - "eor x4, x4, #0xffffffffffffff00", - "mov x26, x4", - "tst x4, x4" + "mov x20, x4", + "eor x21, x20, #0xffffffffffffff00", + "mov x4, x21", + "mov x26, x21", + "tst x21, x21" ] }, "cmp eax, -256": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x81 /7", "ExpectedArm64ASM": [ "mov w20, #0xffffff00", - "mov x27, x4", - "subs w26, w27, w20", + "mov x21, x4", + "mov x27, x21", + "subs w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmp rax, -256": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x81 /7", "ExpectedArm64ASM": [ - "mov x27, x4", - "adds x26, x27, #0x100 (256)", + "mov x20, x4", + "mov x27, x20", + "adds x21, x20, #0x100 (256)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "add ax, 1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "lsl w0, w27, #16", + "mov x21, x4", + "mov x27, x21", + "lsl w0, w21, #16", "cmn w0, w20, lsl #16", - "add w26, w27, #0x1 (1)", - "mov x4, x27", - "bfxil x4, x26, #0, #16" + "add w20, w21, #0x1 (1)", + "mov x26, x20", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "add eax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ - "mov x27, x4", - "adds w26, w27, #0x1 (1)", - "mov x4, x26" + "mov x20, x4", + "mov x27, x20", + "adds w21, w20, #0x1 (1)", + "mov x26, x21", + "mov x4, x21" ] }, "add rax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ - "mov x27, x4", - "adds x26, x27, #0x1 (1)", - "mov x4, x26" + "mov x20, x4", + "mov x27, x20", + "adds x21, x20, #0x1 (1)", + "mov x26, x21", + "mov x4, x21" ] }, "or eax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /1", "ExpectedArm64ASM": [ - "orr w4, w4, #0x1", - "mov x26, x4", - "tst w4, w4" + "mov x20, x4", + "orr w21, w20, #0x1", + "mov x4, x21", + "mov x26, x21", + "tst w21, w21" ] }, "or rax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /1", "ExpectedArm64ASM": [ - "orr x4, x4, #0x1", - "mov x26, x4", - "tst x4, x4" + "mov x20, x4", + "orr x21, x20, #0x1", + "mov x4, x21", + "mov x26, x21", + "tst x21, x21" ] }, "adc eax, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /2", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "adcs w26, w27, w20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adcs w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "adc rax, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /2", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "adcs x26, x27, x20", - "mov x4, x26" + "mov x21, x4", + "mov x27, x21", + "adcs x22, x21, x20", + "mov x26, x22", + "mov x4, x22" ] }, "sbb eax, 1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "GROUP1 0x83 /3", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs w26, w27, w20", + "mov x21, x4", + "mov x27, x21", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "sbcs w22, w21, w20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x26, x22", + "mov x4, x22", + "msr nzcv, x21" ] }, "sbb rax, 1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "GROUP1 0x83 /3", "ExpectedArm64ASM": [ "mov w20, #0x1", - "mov x27, x4", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs x26, x27, x20", + "mov x21, x4", + "mov x27, x21", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "sbcs x22, x21, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x26, x22", + "mov x4, x22", + "msr nzcv, x21" ] }, "and eax, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP1 0x83 /4", "ExpectedArm64ASM": [ - "ands w26, w4, #0x1", - "mov x4, x26" + "mov x20, x4", + "ands w21, w20, #0x1", + "mov x26, x21", + "mov x4, x21" ] }, "and rax, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP1 0x83 /4", "ExpectedArm64ASM": [ - "ands x26, x4, #0x1", - "mov x4, x26" + "mov x20, x4", + "ands x21, x20, #0x1", + "mov x26, x21", + "mov x4, x21" ] }, "sub eax, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x83 /5", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs w26, w27, #0x1 (1)", + "mov x20, x4", + "mov x27, x20", + "subs w21, w20, #0x1 (1)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x4, x21", + "msr nzcv, x22" ] }, "sub rax, 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x83 /5", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs x26, x27, #0x1 (1)", + "mov x20, x4", + "mov x27, x20", + "subs x21, x20, #0x1 (1)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x4, x21", + "msr nzcv, x22" ] }, "xor eax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /6", "ExpectedArm64ASM": [ - "eor w4, w4, #0x1", - "mov x26, x4", - "tst w4, w4" + "mov x20, x4", + "eor w21, w20, #0x1", + "mov x4, x21", + "mov x26, x21", + "tst w21, w21" ] }, "xor rax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /6", "ExpectedArm64ASM": [ - "eor x4, x4, #0x1", - "mov x26, x4", - "tst x4, x4" + "mov x20, x4", + "eor x21, x20, #0x1", + "mov x4, x21", + "mov x26, x21", + "tst x21, x21" ] }, "cmp eax, 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x83 /7", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs w26, w27, #0x1 (1)", + "mov x20, x4", + "mov x27, x20", + "subs w21, w20, #0x1 (1)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmp rax, 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x83 /7", "ExpectedArm64ASM": [ - "mov x27, x4", - "subs x26, x27, #0x1 (1)", + "mov x20, x4", + "mov x27, x20", + "subs x21, x20, #0x1 (1)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "add ax, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "mvn w27, w4", - "lsl w0, w4, #16", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "lsl w0, w21, #16", "cmn w0, w20, lsl #16", - "add w26, w4, w20", - "bfxil x4, x26, #0, #16" + "add w22, w21, w20", + "mov x26, x22", + "mov x20, x21", + "bfxil x20, x22, #0, #16", + "mov x4, x20" ] }, "add eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "adds w26, w4, w20", - "mov x4, x26" + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "adds w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "add rax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /0", "ExpectedArm64ASM": [ - "mvn w27, w4", - "subs x26, x4, #0x1 (1)", - "mov x4, x26" + "mov x20, x4", + "mvn w21, w20", + "mov x27, x21", + "subs x21, x20, #0x1 (1)", + "mov x26, x21", + "mov x4, x21" ] }, "or eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /-1", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "orr w4, w4, w20", - "mov x26, x4", - "tst w4, w4" + "mov x21, x4", + "orr w22, w21, w20", + "mov x4, x22", + "mov x26, x22", + "tst w22, w22" ] }, "or rax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /-1", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "orr x4, x4, x20", - "mov x26, x4", - "tst x4, x4" + "mov x21, x4", + "orr x22, x21, x20", + "mov x4, x22", + "mov x26, x22", + "tst x22, x22" ] }, "adc eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x83 /2", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "adcs w26, w4, w20", - "mov x4, x26" + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "adcs w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "adc rax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "GROUP1 0x83 /2", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "mvn w27, w4", - "adcs x26, x4, x20", - "mov x4, x26" + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "adcs x22, x21, x20", + "mov x26, x22", + "mov x4, x22" ] }, "sbb eax, -1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 13, "Comment": "GROUP1 0x83 /3", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs w26, w4, w20", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "sbcs w22, w21, w20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x26, x22", + "mov x4, x22", + "msr nzcv, x21" ] }, "sbb rax, -1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 13, "Comment": "GROUP1 0x83 /3", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "mvn w27, w4", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "sbcs x26, x4, x20", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "sbcs x22, x21, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x26, x22", + "mov x4, x22", + "msr nzcv, x21" ] }, "and eax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /4", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "ands w26, w4, w20", - "mov x4, x26" + "mov x21, x4", + "ands w22, w21, w20", + "mov x26, x22", + "mov x4, x22" ] }, "and rax, -1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "GROUP1 0x83 /4", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "ands x26, x4, x20", - "mov x4, x26" + "mov x21, x4", + "ands x22, x21, x20", + "mov x26, x22", + "mov x4, x22" ] }, "sub eax, -1": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "GROUP1 0x83 /5", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "subs w26, w4, w20", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "mov x4, x22", + "msr nzcv, x21" ] }, "sub rax, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "GROUP1 0x83 /5", "ExpectedArm64ASM": [ - "mvn w27, w4", - "adds x26, x4, #0x1 (1)", + "mov x20, x4", + "mvn w21, w20", + "mov x27, x21", + "adds x21, x20, #0x1 (1)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x4, x26", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x4, x21", + "msr nzcv, x22" ] }, "xor eax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /6", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "eor w4, w4, w20", - "mov x26, x4", - "tst w4, w4" + "mov x21, x4", + "eor w22, w21, w20", + "mov x4, x22", + "mov x26, x22", + "tst w22, w22" ] }, "xor rax, -1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP1 0x83 /6", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "eor x4, x4, x20", - "mov x26, x4", - "tst x4, x4" + "mov x21, x4", + "eor x22, x21, x20", + "mov x4, x22", + "mov x26, x22", + "tst x22, x22" ] }, "cmp eax, -1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "GROUP1 0x83 /7", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "mvn w27, w4", - "subs w26, w4, w20", + "mov x21, x4", + "mvn w22, w21", + "mov x27, x22", + "subs w22, w21, w20", + "mov x26, x22", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmp rax, -1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP1 0x83 /7", "ExpectedArm64ASM": [ - "mvn w27, w4", - "adds x26, x4, #0x1 (1)", + "mov x20, x4", + "mvn w21, w20", + "mov x27, x21", + "adds x21, x20, #0x1 (1)", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "rol al, 2": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 14, "Comment": "GROUP2 0xC0 /0", "ExpectedArm64ASM": [ - "mov w20, w4", - "bfi w20, w4, #8, #8", - "bfi w20, w20, #16, #16", - "ror w20, w20, #30", - "bfxil x4, x20, #0, #8", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x20, x20, #0, #1", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "mov x20, x4", + "mov w21, w20", + "bfi w21, w20, #8, #8", + "mov w22, w21", + "bfi w22, w21, #16, #16", + "ror w21, w22, #30", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mrs x20, nzcv", + "and w22, w20, #0xc0000000", + "ubfx x20, x21, #0, #1", + "orr w21, w22, w20, lsl #29", + "msr nzcv, x21" ] }, "ror al, 2": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xC0 /1", "ExpectedArm64ASM": [ - "mov w20, w4", - "bfi w20, w4, #8, #8", - "ror w20, w20, #2", - "bfxil x4, x20, #0, #8", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x20, x20, #7, #1", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "mov x20, x4", + "mov w21, w20", + "bfi w21, w20, #8, #8", + "ror w22, w21, #2", + "mov x21, x20", + "bfxil x21, x22, #0, #8", + "mov x4, x21", + "mrs x20, nzcv", + "and w21, w20, #0xc0000000", + "ubfx x20, x22, #7, #1", + "orr w22, w21, w20, lsl #29", + "msr nzcv, x22" ] }, "rcl al, 2": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 36, "Comment": "GROUP2 0xC0 /2", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "mov w21, #0x0", - "cset w22, hs", - "bfi x21, x20, #55, #8", - "bfi x21, x22, #63, #1", - "bfi x21, x20, #46, #8", - "bfi x21, x22, #54, #1", - "bfi x21, x20, #37, #8", - "bfi x21, x22, #45, #1", - "bfi x21, x20, #28, #8", - "bfi x21, x22, #36, #1", - "bfi x21, x20, #19, #8", - "bfi x21, x22, #27, #1", - "mov x0, x21", - "bfxil x0, x20, #0, #8", - "mov x20, x0", - "ror x21, x20, #62", - "bfxil x4, x21, #0, #8", - "ror x20, x20, #61", - "ubfx x20, x20, #0, #1", - "mrs x21, nzcv", - "mov w0, w21", - "bfi w0, w20, #29, #1", - "mov w20, w0", - "msr nzcv, x20" + "mov x20, x4", + "uxtb w21, w20", + "mov w22, #0x0", + "cset w23, hs", + "mov x24, x22", + "bfi x24, x21, #55, #8", + "mov x22, x24", + "bfi x22, x23, #63, #1", + "mov x24, x22", + "bfi x24, x21, #46, #8", + "mov x22, x24", + "bfi x22, x23, #54, #1", + "mov x24, x22", + "bfi x24, x21, #37, #8", + "mov x22, x24", + "bfi x22, x23, #45, #1", + "mov x24, x22", + "bfi x24, x21, #28, #8", + "mov x22, x24", + "bfi x22, x23, #36, #1", + "mov x24, x22", + "bfi x24, x21, #19, #8", + "mov x22, x24", + "bfi x22, x23, #27, #1", + "mov x23, x22", + "bfxil x23, x21, #0, #8", + "ror x21, x23, #62", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "ror x20, x23, #61", + "ubfx x21, x20, #0, #1", + "mrs x20, nzcv", + "mov w22, w20", + "bfi w22, w21, #29, #1", + "msr nzcv, x22" ] }, "rcr al, 2": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 20, "Comment": "GROUP2 0xC0 /3", "ExpectedArm64ASM": [ "cset w20, hs", - "uxtb w21, w4", - "mov x0, x21", - "bfi x0, x20, #8, #1", - "mov x20, x0", - "bfi x20, x20, #9, #9", - "bfi x20, x20, #18, #18", - "bfi x20, x20, #36, #9", - "lsr w21, w20, #2", - "bfxil x4, x21, #0, #8", - "ubfx x20, x20, #1, #1", - "mrs x21, nzcv", - "mov w0, w21", - "bfi w0, w20, #29, #1", - "mov w20, w0", - "msr nzcv, x20" + "mov x21, x4", + "uxtb w22, w21", + "mov x23, x22", + "bfi x23, x20, #8, #1", + "mov x20, x23", + "bfi x20, x23, #9, #9", + "mov x22, x20", + "bfi x22, x20, #18, #18", + "mov x20, x22", + "bfi x20, x22, #36, #9", + "lsr w22, w20, #2", + "mov x23, x21", + "bfxil x23, x22, #0, #8", + "mov x4, x23", + "ubfx x21, x20, #1, #1", + "mrs x20, nzcv", + "mov w22, w20", + "bfi w22, w21, #29, #1", + "msr nzcv, x22" ] }, "shl al, 2": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xC0 /4", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "lsl w26, w20, #2", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24", - "ubfx x20, x20, #6, #1", + "mov x20, x4", + "uxtb w21, w20", + "lsl w22, w21, #2", + "mov x23, x20", + "bfxil x23, x22, #0, #8", + "mov x4, x23", + "cmn wzr, w22, lsl #24", + "ubfx x20, x21, #6, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "orr w23, w21, w20, lsl #29", + "mov x26, x22", + "msr nzcv, x23" ] }, "shr al, 2": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xC0 /5", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "lsr w26, w20, #2", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24", - "ubfx x20, x20, #1, #1", + "mov x20, x4", + "uxtb w21, w20", + "lsr w22, w21, #2", + "mov x23, x20", + "bfxil x23, x22, #0, #8", + "mov x4, x23", + "cmn wzr, w22, lsl #24", + "ubfx x20, x21, #1, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "orr w23, w21, w20, lsl #29", + "mov x26, x22", + "msr nzcv, x23" ] }, "sar al, 2": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": "GROUP2 0xC0 /7", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "sxtb x20, w20", - "asr x26, x20, #2", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24", - "ubfx x20, x20, #1, #1", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "mov x20, x4", + "uxtb w21, w20", + "sxtb x22, w21", + "asr x21, x22, #2", + "mov x23, x20", + "bfxil x23, x21, #0, #8", + "mov x4, x23", + "cmn wzr, w21, lsl #24", + "ubfx x20, x22, #1, #1", + "mrs x22, nzcv", + "orr w23, w22, w20, lsl #29", + "mov x26, x21", + "msr nzcv, x23" ] }, "rol ax, 2": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xC1 /0", "ExpectedArm64ASM": [ - "mov w20, w4", - "bfi w20, w4, #16, #16", - "ror w20, w20, #30", - "bfxil x4, x20, #0, #16", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x20, x20, #0, #1", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "mov x20, x4", + "mov w21, w20", + "bfi w21, w20, #16, #16", + "ror w22, w21, #30", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21", + "mrs x20, nzcv", + "and w21, w20, #0xc0000000", + "ubfx x20, x22, #0, #1", + "orr w22, w21, w20, lsl #29", + "msr nzcv, x22" ] }, "rol eax, 2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP2 0xC1 /0", "ExpectedArm64ASM": [ - "ror w4, w4, #30", + "mov x20, x4", + "ror w21, w20, #30", + "mov x4, x21", "mrs x20, nzcv", - "and w20, w20, #0xc0000000", - "ubfx x21, x4, #0, #1", - "orr w20, w20, w21, lsl #29", - "msr nzcv, x20" + "and w22, w20, #0xc0000000", + "ubfx x20, x21, #0, #1", + "orr w21, w22, w20, lsl #29", + "msr nzcv, x21" ] }, "rol rax, 2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP2 0xC1 /0", "ExpectedArm64ASM": [ - "ror x4, x4, #62", + "mov x20, x4", + "ror x21, x20, #62", + "mov x4, x21", "mrs x20, nzcv", - "and w20, w20, #0xc0000000", - "ubfx x21, x4, #0, #1", - "orr w20, w20, w21, lsl #29", - "msr nzcv, x20" + "and w22, w20, #0xc0000000", + "ubfx x20, x21, #0, #1", + "orr w21, w22, w20, lsl #29", + "msr nzcv, x21" ] }, "ror ax, 2": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xC1 /1", "ExpectedArm64ASM": [ - "mov w20, w4", - "bfi w20, w4, #16, #16", - "ror w20, w20, #2", - "bfxil x4, x20, #0, #16", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x20, x20, #15, #1", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "mov x20, x4", + "mov w21, w20", + "bfi w21, w20, #16, #16", + "ror w22, w21, #2", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21", + "mrs x20, nzcv", + "and w21, w20, #0xc0000000", + "ubfx x20, x22, #15, #1", + "orr w22, w21, w20, lsl #29", + "msr nzcv, x22" ] }, "ror eax, 2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP2 0xC1 /1", "ExpectedArm64ASM": [ - "ror w4, w4, #2", + "mov x20, x4", + "ror w21, w20, #2", + "mov x4, x21", "mrs x20, nzcv", - "and w20, w20, #0xc0000000", - "ubfx x21, x4, #31, #1", - "orr w20, w20, w21, lsl #29", - "msr nzcv, x20" + "and w22, w20, #0xc0000000", + "ubfx x20, x21, #31, #1", + "orr w21, w22, w20, lsl #29", + "msr nzcv, x21" ] }, "ror rax, 2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP2 0xC1 /1", "ExpectedArm64ASM": [ - "ror x4, x4, #2", + "mov x20, x4", + "ror x21, x20, #2", + "mov x4, x21", "mrs x20, nzcv", - "and w20, w20, #0xc0000000", - "lsr x21, x4, #63", - "orr w20, w20, w21, lsl #29", - "msr nzcv, x20" + "and w22, w20, #0xc0000000", + "lsr x20, x21, #63", + "orr w21, w22, w20, lsl #29", + "msr nzcv, x21" ] }, "rcl ax, 2": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 28, "Comment": "GROUP2 0xC1 /2", "ExpectedArm64ASM": [ - "uxth w20, w4", - "mov w21, #0x0", - "cset w22, hs", - "bfi x21, x20, #47, #16", - "bfi x21, x22, #63, #1", - "bfi x21, x20, #30, #16", - "bfi x21, x22, #46, #1", - "bfi x21, x20, #13, #16", - "bfi x21, x22, #29, #1", - "mov x0, x21", - "bfxil x0, x20, #0, #16", - "mov x20, x0", - "ror x21, x20, #62", - "bfxil x4, x21, #0, #16", - "ror x20, x20, #61", - "ubfx x20, x20, #0, #1", - "mrs x21, nzcv", - "mov w0, w21", - "bfi w0, w20, #29, #1", - "mov w20, w0", - "msr nzcv, x20" + "mov x20, x4", + "uxth w21, w20", + "mov w22, #0x0", + "cset w23, hs", + "mov x24, x22", + "bfi x24, x21, #47, #16", + "mov x22, x24", + "bfi x22, x23, #63, #1", + "mov x24, x22", + "bfi x24, x21, #30, #16", + "mov x22, x24", + "bfi x22, x23, #46, #1", + "mov x24, x22", + "bfi x24, x21, #13, #16", + "mov x22, x24", + "bfi x22, x23, #29, #1", + "mov x23, x22", + "bfxil x23, x21, #0, #16", + "ror x21, x23, #62", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22", + "ror x20, x23, #61", + "ubfx x21, x20, #0, #1", + "mrs x20, nzcv", + "mov w22, w20", + "bfi w22, w21, #29, #1", + "msr nzcv, x22" ] }, "rcl eax, 2": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xC1 /2", "ExpectedArm64ASM": [ - "lsl w20, w4, #2", - "cset w21, hs", - "orr w20, w20, w4, lsr #31", - "ubfx x22, x4, #30, #1", - "lsl x22, x22, #29", - "orr w4, w20, w21, lsl #1", - "msr nzcv, x22" + "mov x20, x4", + "lsl w21, w20, #2", + "cset w22, hs", + "orr w23, w21, w20, lsr #31", + "ubfx x21, x20, #30, #1", + "lsl x20, x21, #29", + "orr w21, w23, w22, lsl #1", + "mov x4, x21", + "msr nzcv, x20" ] }, "rcl rax, 2": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xC1 /2", "ExpectedArm64ASM": [ - "lsl x20, x4, #2", - "cset w21, hs", - "orr x20, x20, x4, lsr #63", - "ubfx x22, x4, #62, #1", - "lsl x22, x22, #29", - "orr x4, x20, x21, lsl #1", - "msr nzcv, x22" + "mov x20, x4", + "lsl x21, x20, #2", + "cset w22, hs", + "orr x23, x21, x20, lsr #63", + "ubfx x21, x20, #62, #1", + "lsl x20, x21, #29", + "orr x21, x23, x22, lsl #1", + "mov x4, x21", + "msr nzcv, x20" ] }, "rcr ax, 2": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 18, "Comment": "GROUP2 0xC1 /3", "ExpectedArm64ASM": [ "cset w20, hs", - "uxth w21, w4", - "mov x0, x21", - "bfi x0, x20, #16, #1", - "mov x20, x0", - "bfi x20, x20, #17, #17", - "bfi x20, x20, #34, #17", - "lsr w21, w20, #2", - "bfxil x4, x21, #0, #16", - "ubfx x20, x20, #1, #1", + "mov x21, x4", + "uxth w22, w21", + "mov x23, x22", + "bfi x23, x20, #16, #1", + "mov x20, x23", + "bfi x20, x23, #17, #17", + "mov x22, x20", + "bfi x22, x20, #34, #17", + "lsr w20, w22, #2", + "mov x23, x21", + "bfxil x23, x20, #0, #16", + "mov x4, x23", + "ubfx x20, x22, #1, #1", "mrs x21, nzcv", - "mov w0, w21", - "bfi w0, w20, #29, #1", - "mov w20, w0", - "msr nzcv, x20" + "mov w22, w21", + "bfi w22, w20, #29, #1", + "msr nzcv, x22" ] }, "rcr eax, 2": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xC1 /3", "ExpectedArm64ASM": [ - "lsr w20, w4, #2", - "cset w21, hs", - "orr w20, w20, w4, lsl #31", - "ubfx x22, x4, #1, #1", - "lsl x22, x22, #29", - "orr w4, w20, w21, lsl #30", - "msr nzcv, x22" + "mov x20, x4", + "lsr w21, w20, #2", + "cset w22, hs", + "orr w23, w21, w20, lsl #31", + "ubfx x21, x20, #1, #1", + "lsl x20, x21, #29", + "orr w21, w23, w22, lsl #30", + "mov x4, x21", + "msr nzcv, x20" ] }, "rcr rax, 2": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xC1 /3", "ExpectedArm64ASM": [ - "lsr x20, x4, #2", - "cset w21, hs", - "orr x20, x20, x4, lsl #63", - "ubfx x22, x4, #1, #1", - "lsl x22, x22, #29", - "orr x4, x20, x21, lsl #62", - "msr nzcv, x22" + "mov x20, x4", + "lsr x21, x20, #2", + "cset w22, hs", + "orr x23, x21, x20, lsl #63", + "ubfx x21, x20, #1, #1", + "lsl x20, x21, #29", + "orr x21, x23, x22, lsl #62", + "mov x4, x21", + "msr nzcv, x20" ] }, "shl ax, 2": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xC1 /4", "ExpectedArm64ASM": [ - "uxth w20, w4", - "lsl w26, w20, #2", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "ubfx x20, x20, #14, #1", + "mov x20, x4", + "uxth w21, w20", + "lsl w22, w21, #2", + "mov x23, x20", + "bfxil x23, x22, #0, #16", + "mov x4, x23", + "cmn wzr, w22, lsl #16", + "ubfx x20, x21, #14, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "orr w23, w21, w20, lsl #29", + "mov x26, x22", + "msr nzcv, x23" ] }, "shl eax, 2": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xC1 /4", "ExpectedArm64ASM": [ - "mov w20, w4", - "lsl w4, w20, #2", - "tst w4, w4", - "ubfx x20, x20, #30, #1", + "mov x20, x4", + "mov w21, w20", + "lsl w20, w21, #2", + "mov x4, x20", + "tst w20, w20", + "ubfx x22, x21, #30, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "mov x26, x4", - "msr nzcv, x20" + "orr w23, w21, w22, lsl #29", + "mov x26, x20", + "msr nzcv, x23" ] }, "shl rax, 2": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xC1 /4", "ExpectedArm64ASM": [ "mov x20, x4", - "lsl x4, x20, #2", - "tst x4, x4", - "ubfx x20, x20, #62, #1", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "mov x26, x4", - "msr nzcv, x20" + "lsl x21, x20, #2", + "mov x4, x21", + "tst x21, x21", + "ubfx x22, x20, #62, #1", + "mrs x20, nzcv", + "orr w23, w20, w22, lsl #29", + "mov x26, x21", + "msr nzcv, x23" ] }, "shr ax, 2": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xC1 /5", "ExpectedArm64ASM": [ - "uxth w20, w4", - "lsr w26, w20, #2", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "ubfx x20, x20, #1, #1", + "mov x20, x4", + "uxth w21, w20", + "lsr w22, w21, #2", + "mov x23, x20", + "bfxil x23, x22, #0, #16", + "mov x4, x23", + "cmn wzr, w22, lsl #16", + "ubfx x20, x21, #1, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "orr w23, w21, w20, lsl #29", + "mov x26, x22", + "msr nzcv, x23" ] }, "shr eax, 2": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xC1 /5", "ExpectedArm64ASM": [ - "mov w20, w4", - "lsr w4, w20, #2", - "tst w4, w4", - "ubfx x20, x20, #1, #1", + "mov x20, x4", + "mov w21, w20", + "lsr w20, w21, #2", + "mov x4, x20", + "tst w20, w20", + "ubfx x22, x21, #1, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "mov x26, x4", - "msr nzcv, x20" + "orr w23, w21, w22, lsl #29", + "mov x26, x20", + "msr nzcv, x23" ] }, "shr rax, 2": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xC1 /5", "ExpectedArm64ASM": [ "mov x20, x4", - "lsr x4, x20, #2", - "tst x4, x4", - "ubfx x20, x20, #1, #1", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "mov x26, x4", - "msr nzcv, x20" + "lsr x21, x20, #2", + "mov x4, x21", + "tst x21, x21", + "ubfx x22, x20, #1, #1", + "mrs x20, nzcv", + "orr w23, w20, w22, lsl #29", + "mov x26, x21", + "msr nzcv, x23" ] }, "sar ax, 2": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": "GROUP2 0xC1 /7", "ExpectedArm64ASM": [ - "uxth w20, w4", - "sxth x20, w20", - "asr x26, x20, #2", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "ubfx x20, x20, #1, #1", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "mov x20, x4", + "uxth w21, w20", + "sxth x22, w21", + "asr x21, x22, #2", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "cmn wzr, w21, lsl #16", + "ubfx x20, x22, #1, #1", + "mrs x22, nzcv", + "orr w23, w22, w20, lsl #29", + "mov x26, x21", + "msr nzcv, x23" ] }, "sar eax, 2": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xC1 /7", "ExpectedArm64ASM": [ - "mov w20, w4", - "asr w4, w20, #2", - "tst w4, w4", - "ubfx x20, x20, #1, #1", + "mov x20, x4", + "mov w21, w20", + "asr w20, w21, #2", + "mov x4, x20", + "tst w20, w20", + "ubfx x22, x21, #1, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "mov x26, x4", - "msr nzcv, x20" + "orr w23, w21, w22, lsl #29", + "mov x26, x20", + "msr nzcv, x23" ] }, "sar rax, 2": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xC1 /7", "ExpectedArm64ASM": [ "mov x20, x4", - "asr x4, x20, #2", - "tst x4, x4", - "ubfx x20, x20, #1, #1", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "mov x26, x4", - "msr nzcv, x20" + "asr x21, x20, #2", + "mov x4, x21", + "tst x21, x21", + "ubfx x22, x20, #1, #1", + "mrs x20, nzcv", + "orr w23, w20, w22, lsl #29", + "mov x26, x21", + "msr nzcv, x23" ] }, "rol al, 1": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 17, "Comment": "GROUP2 0xd0 /0", "ExpectedArm64ASM": [ - "mov w20, w4", - "bfi w20, w4, #8, #8", - "bfi w20, w20, #16, #16", - "ror w20, w20, #31", - "bfxil x4, x20, #0, #8", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #0, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #7", - "ubfx x20, x20, #0, #1", - "orr w20, w21, w20, lsl #28", + "mov x20, x4", + "mov w21, w20", + "bfi w21, w20, #8, #8", + "mov w22, w21", + "bfi w22, w21, #16, #16", + "ror w21, w22, #31", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "mrs x20, nzcv", + "and w22, w20, #0xc0000000", + "ubfx x20, x21, #0, #1", + "orr w23, w22, w20, lsl #29", + "eor w20, w21, w21, lsr #7", + "ubfx x21, x20, #0, #1", + "orr w20, w23, w21, lsl #28", "msr nzcv, x20" ] }, "ror al, 1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 15, "Comment": "GROUP2 0xd0 /1", "ExpectedArm64ASM": [ - "mov w20, w4", - "bfi w20, w4, #8, #8", - "ror w20, w20, #1", - "bfxil x4, x20, #0, #8", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #7, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #1", - "ubfx x20, x20, #6, #1", - "orr w20, w21, w20, lsl #28", + "mov x20, x4", + "mov w21, w20", + "bfi w21, w20, #8, #8", + "ror w22, w21, #1", + "mov x21, x20", + "bfxil x21, x22, #0, #8", + "mov x4, x21", + "mrs x20, nzcv", + "and w21, w20, #0xc0000000", + "ubfx x20, x22, #7, #1", + "orr w23, w21, w20, lsl #29", + "eor w20, w22, w22, lsr #1", + "ubfx x21, x20, #6, #1", + "orr w20, w23, w21, lsl #28", "msr nzcv, x20" ] }, "rcl al, 1": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xd0 /2", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "cset w21, hs", - "orr w21, w21, w20, lsl #1", - "bfxil x4, x21, #0, #8", - "ubfx x22, x20, #7, #1", - "mrs x23, nzcv", - "mov w0, w23", - "bfi w0, w22, #29, #1", - "mov w22, w0", - "eor w20, w21, w20", - "ubfx x20, x20, #7, #1", - "mov w0, w22", - "bfi w0, w20, #28, #1", - "mov w20, w0", + "mov x20, x4", + "uxtb w21, w20", + "cset w22, hs", + "orr w23, w22, w21, lsl #1", + "mov x22, x20", + "bfxil x22, x23, #0, #8", + "mov x4, x22", + "ubfx x20, x21, #7, #1", + "mrs x22, nzcv", + "mov w24, w22", + "bfi w24, w20, #29, #1", + "eor w20, w23, w21", + "ubfx x21, x20, #7, #1", + "mov w20, w24", + "bfi w20, w21, #28, #1", "msr nzcv, x20" ] }, "rcr al, 1": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 18, "Comment": "GROUP2 0xd0 /3", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "cset w21, hs", - "ubfx x22, x20, #0, #1", - "mrs x23, nzcv", - "mov w0, w23", - "bfi w0, w22, #29, #1", - "mov w22, w0", - "ubfx w20, w20, #1, #7", - "bfi w20, w21, #7, #1", - "bfxil x4, x20, #0, #8", - "eor w20, w20, w20, lsr #1", - "ubfx x20, x20, #6, #1", - "mov w0, w22", - "bfi w0, w20, #28, #1", - "mov w20, w0", + "mov x20, x4", + "uxtb w21, w20", + "cset w22, hs", + "ubfx x23, x21, #0, #1", + "mrs x24, nzcv", + "mov w25, w24", + "bfi w25, w23, #29, #1", + "ubfx w23, w21, #1, #7", + "mov w21, w23", + "bfi w21, w22, #7, #1", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22", + "eor w20, w21, w21, lsr #1", + "ubfx x21, x20, #6, #1", + "mov w20, w25", + "bfi w20, w21, #28, #1", "msr nzcv, x20" ] }, "shl al, 1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 15, "Comment": "GROUP2 0xd0 /4", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "lsl w26, w20, #1", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24", + "mov x20, x4", + "uxtb w21, w20", + "lsl w22, w21, #1", + "mov x23, x20", + "bfxil x23, x22, #0, #8", + "mov x4, x23", + "cmn wzr, w22, lsl #24", + "ubfx x20, x21, #7, #1", + "mrs x23, nzcv", + "orr w24, w23, w20, lsl #29", + "mov x26, x22", + "eor w20, w22, w21", "ubfx x21, x20, #7, #1", - "mrs x22, nzcv", - "orr w21, w22, w21, lsl #29", - "eor w20, w26, w20", - "ubfx x20, x20, #7, #1", - "orr w20, w21, w20, lsl #28", + "orr w20, w24, w21, lsl #28", "msr nzcv, x20" ] }, "shr al, 1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 14, "Comment": "GROUP2 0xd0 /5", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "lsr w26, w20, #1", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24", - "ubfx x21, x20, #0, #1", - "mrs x22, nzcv", - "orr w21, w22, w21, lsl #29", - "ubfx x20, x20, #7, #1", - "orr w20, w21, w20, lsl #28", - "msr nzcv, x20" + "mov x20, x4", + "uxtb w21, w20", + "lsr w22, w21, #1", + "mov x23, x20", + "bfxil x23, x22, #0, #8", + "mov x4, x23", + "cmn wzr, w22, lsl #24", + "ubfx x20, x21, #0, #1", + "mrs x23, nzcv", + "orr w24, w23, w20, lsl #29", + "mov x26, x22", + "ubfx x20, x21, #7, #1", + "orr w21, w24, w20, lsl #28", + "msr nzcv, x21" ] }, "sar al, 1": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": "GROUP2 0xd0 /7", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "sxtb x20, w20", - "asr x26, x20, #1", - "bfxil x4, x26, #0, #8", - "cmn wzr, w26, lsl #24", - "ubfx x20, x20, #0, #1", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "mov x20, x4", + "uxtb w21, w20", + "sxtb x22, w21", + "asr x21, x22, #1", + "mov x23, x20", + "bfxil x23, x21, #0, #8", + "mov x4, x23", + "cmn wzr, w21, lsl #24", + "ubfx x20, x22, #0, #1", + "mrs x22, nzcv", + "orr w23, w22, w20, lsl #29", + "mov x26, x21", + "msr nzcv, x23" ] }, "rol ax, 1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 15, "Comment": "GROUP2 0xd1 /0", "ExpectedArm64ASM": [ - "mov w20, w4", - "bfi w20, w4, #16, #16", - "ror w20, w20, #31", - "bfxil x4, x20, #0, #16", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #0, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #15", - "ubfx x20, x20, #0, #1", - "orr w20, w21, w20, lsl #28", + "mov x20, x4", + "mov w21, w20", + "bfi w21, w20, #16, #16", + "ror w22, w21, #31", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21", + "mrs x20, nzcv", + "and w21, w20, #0xc0000000", + "ubfx x20, x22, #0, #1", + "orr w23, w21, w20, lsl #29", + "eor w20, w22, w22, lsr #15", + "ubfx x21, x20, #0, #1", + "orr w20, w23, w21, lsl #28", "msr nzcv, x20" ] }, "rol eax, 1": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd1 /0", "ExpectedArm64ASM": [ - "ror w4, w4, #31", + "mov x20, x4", + "ror w21, w20, #31", + "mov x4, x21", "mrs x20, nzcv", - "and w20, w20, #0xc0000000", - "ubfx x21, x4, #0, #1", - "orr w20, w20, w21, lsl #29", - "eor w21, w4, w4, lsr #31", - "ubfx x21, x21, #0, #1", - "orr w20, w20, w21, lsl #28", + "and w22, w20, #0xc0000000", + "ubfx x20, x21, #0, #1", + "orr w23, w22, w20, lsl #29", + "eor w20, w21, w21, lsr #31", + "ubfx x21, x20, #0, #1", + "orr w20, w23, w21, lsl #28", "msr nzcv, x20" ] }, "rol rax, 1": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd1 /0", "ExpectedArm64ASM": [ - "ror x4, x4, #63", + "mov x20, x4", + "ror x21, x20, #63", + "mov x4, x21", "mrs x20, nzcv", - "and w20, w20, #0xc0000000", - "ubfx x21, x4, #0, #1", - "orr w20, w20, w21, lsl #29", - "eor x21, x4, x4, lsr #63", - "ubfx x21, x21, #0, #1", - "orr w20, w20, w21, lsl #28", + "and w22, w20, #0xc0000000", + "ubfx x20, x21, #0, #1", + "orr w23, w22, w20, lsl #29", + "eor x20, x21, x21, lsr #63", + "ubfx x21, x20, #0, #1", + "orr w20, w23, w21, lsl #28", "msr nzcv, x20" ] }, "ror ax, 1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 15, "Comment": "GROUP2 0xd1 /1", "ExpectedArm64ASM": [ - "mov w20, w4", - "bfi w20, w4, #16, #16", - "ror w20, w20, #1", - "bfxil x4, x20, #0, #16", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #15, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #1", - "ubfx x20, x20, #14, #1", - "orr w20, w21, w20, lsl #28", + "mov x20, x4", + "mov w21, w20", + "bfi w21, w20, #16, #16", + "ror w22, w21, #1", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21", + "mrs x20, nzcv", + "and w21, w20, #0xc0000000", + "ubfx x20, x22, #15, #1", + "orr w23, w21, w20, lsl #29", + "eor w20, w22, w22, lsr #1", + "ubfx x21, x20, #14, #1", + "orr w20, w23, w21, lsl #28", "msr nzcv, x20" ] }, "ror eax, 1": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd1 /1", "ExpectedArm64ASM": [ - "ror w4, w4, #1", + "mov x20, x4", + "ror w21, w20, #1", + "mov x4, x21", "mrs x20, nzcv", - "and w20, w20, #0xc0000000", - "ubfx x21, x4, #31, #1", - "orr w20, w20, w21, lsl #29", - "eor w21, w4, w4, lsr #1", - "ubfx x21, x21, #30, #1", - "orr w20, w20, w21, lsl #28", + "and w22, w20, #0xc0000000", + "ubfx x20, x21, #31, #1", + "orr w23, w22, w20, lsl #29", + "eor w20, w21, w21, lsr #1", + "ubfx x21, x20, #30, #1", + "orr w20, w23, w21, lsl #28", "msr nzcv, x20" ] }, "ror rax, 1": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd1 /1", "ExpectedArm64ASM": [ - "ror x4, x4, #1", + "mov x20, x4", + "ror x21, x20, #1", + "mov x4, x21", "mrs x20, nzcv", - "and w20, w20, #0xc0000000", - "lsr x21, x4, #63", - "orr w20, w20, w21, lsl #29", - "eor x21, x4, x4, lsr #1", - "ubfx x21, x21, #62, #1", - "orr w20, w20, w21, lsl #28", + "and w22, w20, #0xc0000000", + "lsr x20, x21, #63", + "orr w23, w22, w20, lsl #29", + "eor x20, x21, x21, lsr #1", + "ubfx x21, x20, #62, #1", + "orr w20, w23, w21, lsl #28", "msr nzcv, x20" ] }, "rcl ax, 1": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xd1 /2", "ExpectedArm64ASM": [ - "uxth w20, w4", - "cset w21, hs", - "orr w21, w21, w20, lsl #1", - "bfxil x4, x21, #0, #16", - "ubfx x22, x20, #15, #1", - "mrs x23, nzcv", - "mov w0, w23", - "bfi w0, w22, #29, #1", - "mov w22, w0", - "eor w20, w21, w20", - "ubfx x20, x20, #15, #1", - "mov w0, w22", - "bfi w0, w20, #28, #1", - "mov w20, w0", + "mov x20, x4", + "uxth w21, w20", + "cset w22, hs", + "orr w23, w22, w21, lsl #1", + "mov x22, x20", + "bfxil x22, x23, #0, #16", + "mov x4, x22", + "ubfx x20, x21, #15, #1", + "mrs x22, nzcv", + "mov w24, w22", + "bfi w24, w20, #29, #1", + "eor w20, w23, w21", + "ubfx x21, x20, #15, #1", + "mov w20, w24", + "bfi w20, w21, #28, #1", "msr nzcv, x20" ] }, @@ -1672,60 +2009,60 @@ "ExpectedInstructionCount": 14, "Comment": "GROUP2 0xd1 /2", "ExpectedArm64ASM": [ - "mov w20, w4", - "cset w21, hs", - "orr w4, w21, w20, lsl #1", + "mov x20, x4", + "mov w21, w20", + "cset w20, hs", + "orr w22, w20, w21, lsl #1", + "mov x4, x22", + "ubfx x20, x21, #31, #1", + "mrs x23, nzcv", + "mov w24, w23", + "bfi w24, w20, #29, #1", + "eor w20, w22, w21", "ubfx x21, x20, #31, #1", - "mrs x22, nzcv", - "mov w0, w22", - "bfi w0, w21, #29, #1", - "mov w21, w0", - "eor w20, w4, w20", - "ubfx x20, x20, #31, #1", - "mov w0, w21", - "bfi w0, w20, #28, #1", - "mov w20, w0", + "mov w20, w24", + "bfi w20, w21, #28, #1", "msr nzcv, x20" ] }, "rcl rax, 1": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 13, "Comment": "GROUP2 0xd1 /2", "ExpectedArm64ASM": [ "mov x20, x4", "cset w21, hs", - "orr x4, x21, x20, lsl #1", + "orr x22, x21, x20, lsl #1", + "mov x4, x22", "lsr x21, x20, #63", - "mrs x22, nzcv", - "mov w0, w22", - "bfi w0, w21, #29, #1", - "mov w21, w0", - "eor x20, x4, x20", - "lsr x20, x20, #63", - "mov w0, w21", - "bfi w0, w20, #28, #1", - "mov w20, w0", - "msr nzcv, x20" + "mrs x23, nzcv", + "mov w24, w23", + "bfi w24, w21, #29, #1", + "eor x21, x22, x20", + "lsr x20, x21, #63", + "mov w21, w24", + "bfi w21, w20, #28, #1", + "msr nzcv, x21" ] }, "rcr ax, 1": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xd1 /3", "ExpectedArm64ASM": [ - "cset w20, hs", - "ubfx x21, x4, #0, #1", - "mrs x22, nzcv", - "mov w0, w22", - "bfi w0, w21, #29, #1", - "mov w21, w0", - "ubfx w22, w4, #1, #15", - "orr w20, w22, w20, lsl #15", - "bfxil x4, x20, #0, #16", - "eor x20, x20, x20, lsr #1", - "ubfx x20, x20, #14, #1", - "mov w0, w21", - "bfi w0, w20, #28, #1", - "mov w20, w0", + "mov x20, x4", + "cset w21, hs", + "ubfx x22, x20, #0, #1", + "mrs x23, nzcv", + "mov w24, w23", + "bfi w24, w22, #29, #1", + "ubfx w22, w20, #1, #15", + "orr w23, w22, w21, lsl #15", + "mov x21, x20", + "bfxil x21, x23, #0, #16", + "mov x4, x21", + "eor x20, x23, x23, lsr #1", + "ubfx x21, x20, #14, #1", + "mov w20, w24", + "bfi w20, w21, #28, #1", "msr nzcv, x20" ] }, @@ -1733,18 +2070,18 @@ "ExpectedInstructionCount": 13, "Comment": "GROUP2 0xd1 /3", "ExpectedArm64ASM": [ - "cset w20, hs", - "ubfx x21, x4, #0, #1", - "mrs x22, nzcv", - "mov w0, w22", - "bfi w0, w21, #29, #1", - "mov w21, w0", - "extr w4, w20, w4, #1", - "eor x20, x4, x4, lsr #1", - "ubfx x20, x20, #30, #1", - "mov w0, w21", - "bfi w0, w20, #28, #1", - "mov w20, w0", + "mov x20, x4", + "cset w21, hs", + "ubfx x22, x20, #0, #1", + "mrs x23, nzcv", + "mov w24, w23", + "bfi w24, w22, #29, #1", + "extr w22, w21, w20, #1", + "mov x4, x22", + "eor x20, x22, x22, lsr #1", + "ubfx x21, x20, #30, #1", + "mov w20, w24", + "bfi w20, w21, #28, #1", "msr nzcv, x20" ] }, @@ -1752,1449 +2089,1781 @@ "ExpectedInstructionCount": 13, "Comment": "GROUP2 0xd1 /3", "ExpectedArm64ASM": [ - "cset w20, hs", - "ubfx x21, x4, #0, #1", - "mrs x22, nzcv", - "mov w0, w22", - "bfi w0, w21, #29, #1", - "mov w21, w0", - "extr x4, x20, x4, #1", - "eor x20, x4, x4, lsr #1", - "ubfx x20, x20, #62, #1", - "mov w0, w21", - "bfi w0, w20, #28, #1", - "mov w20, w0", + "mov x20, x4", + "cset w21, hs", + "ubfx x22, x20, #0, #1", + "mrs x23, nzcv", + "mov w24, w23", + "bfi w24, w22, #29, #1", + "extr x22, x21, x20, #1", + "mov x4, x22", + "eor x20, x22, x22, lsr #1", + "ubfx x21, x20, #62, #1", + "mov w20, w24", + "bfi w20, w21, #28, #1", "msr nzcv, x20" ] }, "shl ax, 1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 15, "Comment": "GROUP2 0xd1 /4", "ExpectedArm64ASM": [ - "uxth w20, w4", - "lsl w26, w20, #1", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", + "mov x20, x4", + "uxth w21, w20", + "lsl w22, w21, #1", + "mov x23, x20", + "bfxil x23, x22, #0, #16", + "mov x4, x23", + "cmn wzr, w22, lsl #16", + "ubfx x20, x21, #15, #1", + "mrs x23, nzcv", + "orr w24, w23, w20, lsl #29", + "mov x26, x22", + "eor w20, w22, w21", "ubfx x21, x20, #15, #1", - "mrs x22, nzcv", - "orr w21, w22, w21, lsl #29", - "eor w20, w26, w20", - "ubfx x20, x20, #15, #1", - "orr w20, w21, w20, lsl #28", + "orr w20, w24, w21, lsl #28", "msr nzcv, x20" ] }, "shl eax, 1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 13, "Comment": "GROUP2 0xd1 /4", "ExpectedArm64ASM": [ - "mov w20, w4", - "lsl w4, w20, #1", - "tst w4, w4", - "ubfx x21, x20, #31, #1", - "mrs x22, nzcv", - "orr w21, w22, w21, lsl #29", - "mov x26, x4", - "eor w20, w4, w20", - "ubfx x20, x20, #31, #1", - "orr w20, w21, w20, lsl #28", - "msr nzcv, x20" + "mov x20, x4", + "mov w21, w20", + "lsl w20, w21, #1", + "mov x4, x20", + "tst w20, w20", + "ubfx x22, x21, #31, #1", + "mrs x23, nzcv", + "orr w24, w23, w22, lsl #29", + "mov x26, x20", + "eor w22, w20, w21", + "ubfx x20, x22, #31, #1", + "orr w21, w24, w20, lsl #28", + "msr nzcv, x21" ] }, "shl rax, 1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd1 /4", "ExpectedArm64ASM": [ "mov x20, x4", - "lsl x4, x20, #1", - "tst x4, x4", - "lsr x21, x20, #63", - "mrs x22, nzcv", - "orr w21, w22, w21, lsl #29", - "mov x26, x4", - "eor x20, x4, x20", - "lsr x20, x20, #63", - "orr w20, w21, w20, lsl #28", - "msr nzcv, x20" + "lsl x21, x20, #1", + "mov x4, x21", + "tst x21, x21", + "lsr x22, x20, #63", + "mrs x23, nzcv", + "orr w24, w23, w22, lsl #29", + "mov x26, x21", + "eor x22, x21, x20", + "lsr x20, x22, #63", + "orr w21, w24, w20, lsl #28", + "msr nzcv, x21" ] }, "shr ax, 1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 14, "Comment": "GROUP2 0xd1 /5", "ExpectedArm64ASM": [ - "uxth w20, w4", - "lsr w26, w20, #1", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "ubfx x21, x20, #0, #1", - "mrs x22, nzcv", - "orr w21, w22, w21, lsl #29", - "ubfx x20, x20, #15, #1", - "orr w20, w21, w20, lsl #28", - "msr nzcv, x20" + "mov x20, x4", + "uxth w21, w20", + "lsr w22, w21, #1", + "mov x23, x20", + "bfxil x23, x22, #0, #16", + "mov x4, x23", + "cmn wzr, w22, lsl #16", + "ubfx x20, x21, #0, #1", + "mrs x23, nzcv", + "orr w24, w23, w20, lsl #29", + "mov x26, x22", + "ubfx x20, x21, #15, #1", + "orr w21, w24, w20, lsl #28", + "msr nzcv, x21" ] }, "shr eax, 1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xd1 /5", "ExpectedArm64ASM": [ - "mov w20, w4", - "lsr w4, w20, #1", - "tst w4, w4", - "ubfx x21, x20, #0, #1", - "mrs x22, nzcv", - "orr w21, w22, w21, lsl #29", - "mov x26, x4", - "ubfx x20, x20, #31, #1", - "orr w20, w21, w20, lsl #28", - "msr nzcv, x20" + "mov x20, x4", + "mov w21, w20", + "lsr w20, w21, #1", + "mov x4, x20", + "tst w20, w20", + "ubfx x22, x21, #0, #1", + "mrs x23, nzcv", + "orr w24, w23, w22, lsl #29", + "mov x26, x20", + "ubfx x20, x21, #31, #1", + "orr w21, w24, w20, lsl #28", + "msr nzcv, x21" ] }, "shr rax, 1": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xd1 /5", "ExpectedArm64ASM": [ "mov x20, x4", - "lsr x4, x20, #1", - "tst x4, x4", - "ubfx x21, x20, #0, #1", - "mrs x22, nzcv", - "orr w21, w22, w21, lsl #29", - "mov x26, x4", - "lsr x20, x20, #63", - "orr w20, w21, w20, lsl #28", + "lsr x21, x20, #1", + "mov x4, x21", + "tst x21, x21", + "ubfx x22, x20, #0, #1", + "mrs x23, nzcv", + "orr w24, w23, w22, lsl #29", + "mov x26, x21", + "lsr x21, x20, #63", + "orr w20, w24, w21, lsl #28", "msr nzcv, x20" ] }, "sar ax, 1": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": "GROUP2 0xd1 /7", "ExpectedArm64ASM": [ - "uxth w20, w4", - "sxth x20, w20", - "asr x26, x20, #1", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "ubfx x20, x20, #0, #1", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "mov x20, x4", + "uxth w21, w20", + "sxth x22, w21", + "asr x21, x22, #1", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "cmn wzr, w21, lsl #16", + "ubfx x20, x22, #0, #1", + "mrs x22, nzcv", + "orr w23, w22, w20, lsl #29", + "mov x26, x21", + "msr nzcv, x23" ] }, "sar eax, 1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "GROUP2 0xd1 /7", "ExpectedArm64ASM": [ - "mov w20, w4", - "asr w4, w20, #1", - "tst w4, w4", - "ubfx x20, x20, #0, #1", + "mov x20, x4", + "mov w21, w20", + "asr w20, w21, #1", + "mov x4, x20", + "tst w20, w20", + "ubfx x22, x21, #0, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "mov x26, x4", - "msr nzcv, x20" + "orr w23, w21, w22, lsl #29", + "mov x26, x20", + "msr nzcv, x23" ] }, "sar rax, 1": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xd1 /7", "ExpectedArm64ASM": [ "mov x20, x4", - "asr x4, x20, #1", - "tst x4, x4", - "ubfx x20, x20, #0, #1", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "mov x26, x4", - "msr nzcv, x20" + "asr x21, x20, #1", + "mov x4, x21", + "tst x21, x21", + "ubfx x22, x20, #0, #1", + "mrs x20, nzcv", + "orr w23, w20, w22, lsl #29", + "mov x26, x21", + "msr nzcv, x23" ] }, "rol al, cl": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 22, "Comment": "GROUP2 0xd2 /0", "ExpectedArm64ASM": [ - "and x20, x5, #0x1f", - "cbz x20, #+0x3c", - "mov w20, w4", - "bfi w20, w4, #8, #8", - "bfi w20, w20, #16, #16", - "neg w21, w5", - "ror w20, w20, w21", - "bfxil x4, x20, #0, #8", + "mov x20, x5", + "and x21, x20, #0x1f", + "cbz x21, #+0x50", + "mov x20, x5", + "mov x21, x4", + "mov w22, w21", + "bfi w22, w21, #8, #8", + "mov w23, w22", + "bfi w23, w22, #16, #16", + "neg w22, w20", + "ror w20, w23, w22", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22", "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #0, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #7", - "ubfx x20, x20, #0, #1", - "orr w20, w21, w20, lsl #28", - "msr nzcv, x20" + "and w22, w21, #0xc0000000", + "ubfx x21, x20, #0, #1", + "orr w23, w22, w21, lsl #29", + "eor w21, w20, w20, lsr #7", + "ubfx x20, x21, #0, #1", + "orr w21, w23, w20, lsl #28", + "msr nzcv, x21" ] }, "ror al, cl": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 21, "Comment": "GROUP2 0xd2 /1", "ExpectedArm64ASM": [ - "and x20, x5, #0x1f", - "cbz x20, #+0x38", - "mov w20, w4", - "bfi w20, w4, #8, #8", - "bfi w20, w20, #16, #16", - "ror w20, w20, w5", - "bfxil x4, x20, #0, #8", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #7, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #1", - "ubfx x20, x20, #6, #1", - "orr w20, w21, w20, lsl #28", + "mov x20, x5", + "and x21, x20, #0x1f", + "cbz x21, #+0x4c", + "mov x20, x5", + "mov x21, x4", + "mov w22, w21", + "bfi w22, w21, #8, #8", + "mov w23, w22", + "bfi w23, w22, #16, #16", + "ror w22, w23, w20", + "mov x20, x21", + "bfxil x20, x22, #0, #8", + "mov x4, x20", + "mrs x20, nzcv", + "and w21, w20, #0xc0000000", + "ubfx x20, x22, #7, #1", + "orr w23, w21, w20, lsl #29", + "eor w20, w22, w22, lsr #1", + "ubfx x21, x20, #6, #1", + "orr w20, w23, w21, lsl #28", "msr nzcv, x20" ] }, "rcl al, cl": { - "ExpectedInstructionCount": 36, + "ExpectedInstructionCount": 48, "Comment": "GROUP2 0xd2 /2", "ExpectedArm64ASM": [ - "and w20, w5, #0x1f", - "cbz x20, #+0x8c", - "and w20, w5, #0x1f", - "uxtb w21, w4", - "mov w22, #0x0", - "cset w23, hs", - "bfi x22, x21, #55, #8", - "bfi x22, x23, #63, #1", - "bfi x22, x21, #46, #8", - "bfi x22, x23, #54, #1", - "bfi x22, x21, #37, #8", - "bfi x22, x23, #45, #1", - "bfi x22, x21, #28, #8", - "bfi x22, x23, #36, #1", - "bfi x22, x21, #19, #8", - "bfi x22, x23, #27, #1", - "mov x0, x22", - "bfxil x0, x21, #0, #8", - "mov x21, x0", - "neg w22, w20", - "ror x22, x21, x22", - "bfxil x4, x22, #0, #8", - "mov w23, #0x3f", - "sub x20, x23, x20", - "ror x20, x21, x20", + "mov x20, x5", + "and w21, w20, #0x1f", + "cbz x21, #+0xb8", + "mov x20, x5", + "and w21, w20, #0x1f", + "mov x20, x4", + "uxtb w22, w20", + "mov w23, #0x0", + "cset w24, hs", + "mov x25, x23", + "bfi x25, x22, #55, #8", + "mov x23, x25", + "bfi x23, x24, #63, #1", + "mov x25, x23", + "bfi x25, x22, #46, #8", + "mov x23, x25", + "bfi x23, x24, #54, #1", + "mov x25, x23", + "bfi x25, x22, #37, #8", + "mov x23, x25", + "bfi x23, x24, #45, #1", + "mov x25, x23", + "bfi x25, x22, #28, #8", + "mov x23, x25", + "bfi x23, x24, #36, #1", + "mov x25, x23", + "bfi x25, x22, #19, #8", + "mov x23, x25", + "bfi x23, x24, #27, #1", + "mov x24, x23", + "bfxil x24, x22, #0, #8", + "neg w22, w21", + "ror x23, x24, x22", + "mov x22, x20", + "bfxil x22, x23, #0, #8", + "mov x4, x22", + "mov w20, #0x3f", + "sub x22, x20, x21", + "ror x20, x24, x22", "ubfx x21, x20, #0, #1", - "mrs x23, nzcv", - "mov w0, w23", - "bfi w0, w21, #29, #1", - "mov w21, w0", - "eor x20, x20, x22, lsr #7", - "ubfx x20, x20, #0, #1", - "mov w0, w21", - "bfi w0, w20, #28, #1", - "mov w20, w0", - "msr nzcv, x20" + "mrs x22, nzcv", + "mov w24, w22", + "bfi w24, w21, #29, #1", + "eor x21, x20, x23, lsr #7", + "ubfx x20, x21, #0, #1", + "mov w21, w24", + "bfi w21, w20, #28, #1", + "msr nzcv, x21" ] }, "rcr al, cl": { - "ExpectedInstructionCount": 23, + "ExpectedInstructionCount": 30, "Comment": "GROUP2 0xd2 /3", "ExpectedArm64ASM": [ - "and w20, w5, #0x1f", - "cbz x20, #+0x58", - "cset w20, hs", - "uxtb w21, w4", - "mov x0, x21", - "bfi x0, x20, #8, #1", - "mov x20, x0", - "bfi x20, x20, #9, #9", - "bfi x20, x20, #18, #18", - "bfi x20, x20, #36, #9", - "lsr w21, w20, w5", - "bfxil x4, x21, #0, #8", - "sub w22, w5, #0x1 (1)", - "lsr w20, w20, w22", - "ubfx x20, x20, #0, #1", - "mrs x22, nzcv", - "mov w0, w22", - "bfi w0, w20, #29, #1", - "mov w20, w0", - "eor w21, w21, w21, lsr #1", - "ubfx x21, x21, #6, #1", + "mov x20, x5", + "and w21, w20, #0x1f", + "cbz x21, #+0x70", + "mov x20, x5", + "cset w21, hs", + "mov x22, x4", + "uxtb w23, w22", + "mov x24, x23", + "bfi x24, x21, #8, #1", + "mov x21, x24", + "bfi x21, x24, #9, #9", + "mov x23, x21", + "bfi x23, x21, #18, #18", + "mov x21, x23", + "bfi x21, x23, #36, #9", + "lsr w23, w21, w20", + "mov x24, x22", + "bfxil x24, x23, #0, #8", + "mov x4, x24", + "sub w22, w20, #0x1 (1)", + "lsr w20, w21, w22", + "ubfx x21, x20, #0, #1", + "mrs x20, nzcv", + "mov w22, w20", + "bfi w22, w21, #29, #1", + "eor w20, w23, w23, lsr #1", + "ubfx x21, x20, #6, #1", + "mov w20, w22", "bfi w20, w21, #28, #1", "msr nzcv, x20" ] }, "shl al, cl": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 23, "Comment": "GROUP2 0xd2 /4", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxtb w21, w5", - "lsl w22, w20, w21", - "bfxil x4, x22, #0, #8", - "cbz w21, #+0x30", + "mov x20, x4", + "uxtb w21, w20", + "mov x22, x5", + "uxtb w23, w22", + "lsl w22, w21, w23", + "mov x24, x20", + "bfxil x24, x22, #0, #8", + "mov x4, x24", + "mov x20, x26", + "mov x24, x20", + "cbz w23, #+0x30", "cmn wzr, w22, lsl #24", - "mov x26, x22", + "mov x24, x22", "mov w0, #0x8", - "sub w0, w0, w21", - "lsr w0, w20, w0", - "eor w2, w20, w22", + "sub w0, w0, w23", + "lsr w0, w21, w0", + "eor w2, w21, w22", "mrs x1, nzcv", "bfi w1, w0, #29, #1", "lsr w2, w2, #7", "bfi w1, w2, #28, #1", - "msr nzcv, x1" + "msr nzcv, x1", + "mov x26, x24" ] }, "shr al, cl": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 22, "Comment": "GROUP2 0xd2 /5", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxtb w21, w5", - "lsr w22, w20, w21", - "bfxil x4, x22, #0, #8", - "cbz w21, #+0x2c", + "mov x20, x4", + "uxtb w21, w20", + "mov x22, x5", + "uxtb w23, w22", + "lsr w22, w21, w23", + "mov x24, x20", + "bfxil x24, x22, #0, #8", + "mov x4, x24", + "mov x20, x26", + "mov x24, x20", + "cbz w23, #+0x2c", "cmn wzr, w22, lsl #24", - "mov x26, x22", - "sub x0, x21, #0x1 (1)", - "lsr w0, w20, w0", - "eor w2, w20, w22", + "mov x24, x22", + "sub x0, x23, #0x1 (1)", + "lsr w0, w21, w0", + "eor w2, w21, w22", "mrs x1, nzcv", "bfi w1, w0, #29, #1", "lsr w2, w2, #7", "bfi w1, w2, #28, #1", - "msr nzcv, x1" + "msr nzcv, x1", + "mov x26, x24" ] }, "sar al, cl": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 20, "Comment": "GROUP2 0xd2 /7", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxtb w21, w5", - "sxtb x20, w20", - "asr w22, w20, w21", - "bfxil x4, x22, #0, #8", - "cbz w21, #+0x20", - "cmn wzr, w22, lsl #24", - "mov x26, x22", - "sub x0, x21, #0x1 (1)", - "lsr w0, w20, w0", + "mov x20, x4", + "uxtb w21, w20", + "mov x22, x5", + "uxtb w23, w22", + "sxtb x22, w21", + "asr w21, w22, w23", + "mov x24, x20", + "bfxil x24, x21, #0, #8", + "mov x4, x24", + "mov x20, x26", + "mov x24, x20", + "cbz w23, #+0x20", + "cmn wzr, w21, lsl #24", + "mov x24, x21", + "sub x0, x23, #0x1 (1)", + "lsr w0, w22, w0", "mrs x1, nzcv", "bfi w1, w0, #29, #1", - "msr nzcv, x1" + "msr nzcv, x1", + "mov x26, x24" ] }, "rol ax, cl": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 20, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ - "and x20, x5, #0x1f", - "cbz x20, #+0x38", - "mov w20, w4", - "bfi w20, w4, #16, #16", - "neg w21, w5", - "ror w20, w20, w21", - "bfxil x4, x20, #0, #16", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #0, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #15", - "ubfx x20, x20, #0, #1", - "orr w20, w21, w20, lsl #28", - "msr nzcv, x20" + "mov x20, x5", + "and x21, x20, #0x1f", + "cbz x21, #+0x48", + "mov x20, x5", + "mov x21, x4", + "mov w22, w21", + "bfi w22, w21, #16, #16", + "neg w23, w20", + "ror w20, w22, w23", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22", + "mrs x21, nzcv", + "and w22, w21, #0xc0000000", + "ubfx x21, x20, #0, #1", + "orr w23, w22, w21, lsl #29", + "eor w21, w20, w20, lsr #15", + "ubfx x20, x21, #0, #1", + "orr w21, w23, w20, lsl #28", + "msr nzcv, x21" ] }, "rol eax, cl": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ - "and x20, x5, #0x1f", - "cbz x20, #+0x2c", - "neg w20, w5", - "ror w4, w4, w20", - "mrs x20, nzcv", - "and w20, w20, #0xc0000000", - "ubfx x21, x4, #0, #1", - "orr w20, w20, w21, lsl #29", - "eor w21, w4, w4, lsr #31", - "ubfx x21, x21, #0, #1", - "orr w20, w20, w21, lsl #28", - "msr nzcv, x20" + "mov x20, x5", + "and x21, x20, #0x1f", + "cbz x21, #+0x38", + "mov x20, x5", + "mov x21, x4", + "neg w22, w20", + "ror w20, w21, w22", + "mov x4, x20", + "mrs x21, nzcv", + "and w22, w21, #0xc0000000", + "ubfx x21, x20, #0, #1", + "orr w23, w22, w21, lsl #29", + "eor w21, w20, w20, lsr #31", + "ubfx x20, x21, #0, #1", + "orr w21, w23, w20, lsl #28", + "msr nzcv, x21" ] }, "rol rax, cl": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xd3 /0", "ExpectedArm64ASM": [ - "and x20, x5, #0x3f", - "cbz x20, #+0x2c", - "neg x20, x5", - "ror x4, x4, x20", - "mrs x20, nzcv", - "and w20, w20, #0xc0000000", - "ubfx x21, x4, #0, #1", - "orr w20, w20, w21, lsl #29", - "eor x21, x4, x4, lsr #63", - "ubfx x21, x21, #0, #1", - "orr w20, w20, w21, lsl #28", - "msr nzcv, x20" + "mov x20, x5", + "and x21, x20, #0x3f", + "cbz x21, #+0x38", + "mov x20, x5", + "mov x21, x4", + "neg x22, x20", + "ror x20, x21, x22", + "mov x4, x20", + "mrs x21, nzcv", + "and w22, w21, #0xc0000000", + "ubfx x21, x20, #0, #1", + "orr w23, w22, w21, lsl #29", + "eor x21, x20, x20, lsr #63", + "ubfx x20, x21, #0, #1", + "orr w21, w23, w20, lsl #28", + "msr nzcv, x21" ] }, "ror ax, cl": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 19, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ - "and x20, x5, #0x1f", - "cbz x20, #+0x34", - "mov w20, w4", - "bfi w20, w4, #16, #16", - "ror w20, w20, w5", - "bfxil x4, x20, #0, #16", - "mrs x21, nzcv", - "and w21, w21, #0xc0000000", - "ubfx x22, x20, #15, #1", - "orr w21, w21, w22, lsl #29", - "eor w20, w20, w20, lsr #1", - "ubfx x20, x20, #14, #1", - "orr w20, w21, w20, lsl #28", + "mov x20, x5", + "and x21, x20, #0x1f", + "cbz x21, #+0x44", + "mov x20, x5", + "mov x21, x4", + "mov w22, w21", + "bfi w22, w21, #16, #16", + "ror w23, w22, w20", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x4, x20", + "mrs x20, nzcv", + "and w21, w20, #0xc0000000", + "ubfx x20, x23, #15, #1", + "orr w22, w21, w20, lsl #29", + "eor w20, w23, w23, lsr #1", + "ubfx x21, x20, #14, #1", + "orr w20, w22, w21, lsl #28", "msr nzcv, x20" ] }, "ror eax, cl": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 15, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ - "and x20, x5, #0x1f", - "cbz x20, #+0x28", - "ror w4, w4, w5", + "mov x20, x5", + "and x21, x20, #0x1f", + "cbz x21, #+0x34", + "mov x20, x5", + "mov x21, x4", + "ror w22, w21, w20", + "mov x4, x22", "mrs x20, nzcv", - "and w20, w20, #0xc0000000", - "ubfx x21, x4, #31, #1", - "orr w20, w20, w21, lsl #29", - "eor w21, w4, w4, lsr #1", - "ubfx x21, x21, #30, #1", - "orr w20, w20, w21, lsl #28", + "and w21, w20, #0xc0000000", + "ubfx x20, x22, #31, #1", + "orr w23, w21, w20, lsl #29", + "eor w20, w22, w22, lsr #1", + "ubfx x21, x20, #30, #1", + "orr w20, w23, w21, lsl #28", "msr nzcv, x20" ] }, "ror rax, cl": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 15, "Comment": "GROUP2 0xd3 /1", "ExpectedArm64ASM": [ - "and x20, x5, #0x3f", - "cbz x20, #+0x28", - "ror x4, x4, x5", + "mov x20, x5", + "and x21, x20, #0x3f", + "cbz x21, #+0x34", + "mov x20, x5", + "mov x21, x4", + "ror x22, x21, x20", + "mov x4, x22", "mrs x20, nzcv", - "and w20, w20, #0xc0000000", - "lsr x21, x4, #63", - "orr w20, w20, w21, lsl #29", - "eor x21, x4, x4, lsr #1", - "ubfx x21, x21, #62, #1", - "orr w20, w20, w21, lsl #28", + "and w21, w20, #0xc0000000", + "lsr x20, x22, #63", + "orr w23, w21, w20, lsl #29", + "eor x20, x22, x22, lsr #1", + "ubfx x21, x20, #62, #1", + "orr w20, w23, w21, lsl #28", "msr nzcv, x20" ] }, "rcl ax, cl": { - "ExpectedInstructionCount": 32, + "ExpectedInstructionCount": 40, "Comment": "GROUP2 0xd3 /2", "ExpectedArm64ASM": [ - "and w20, w5, #0x1f", - "cbz x20, #+0x7c", - "and w20, w5, #0x1f", - "uxth w21, w4", - "mov w22, #0x0", - "cset w23, hs", - "bfi x22, x21, #47, #16", - "bfi x22, x23, #63, #1", - "bfi x22, x21, #30, #16", - "bfi x22, x23, #46, #1", - "bfi x22, x21, #13, #16", - "bfi x22, x23, #29, #1", - "mov x0, x22", - "bfxil x0, x21, #0, #16", - "mov x21, x0", - "neg w22, w20", - "ror x22, x21, x22", - "bfxil x4, x22, #0, #16", - "mov w23, #0x3f", - "sub x20, x23, x20", - "ror x20, x21, x20", + "mov x20, x5", + "and w21, w20, #0x1f", + "cbz x21, #+0x98", + "mov x20, x5", + "and w21, w20, #0x1f", + "mov x20, x4", + "uxth w22, w20", + "mov w23, #0x0", + "cset w24, hs", + "mov x25, x23", + "bfi x25, x22, #47, #16", + "mov x23, x25", + "bfi x23, x24, #63, #1", + "mov x25, x23", + "bfi x25, x22, #30, #16", + "mov x23, x25", + "bfi x23, x24, #46, #1", + "mov x25, x23", + "bfi x25, x22, #13, #16", + "mov x23, x25", + "bfi x23, x24, #29, #1", + "mov x24, x23", + "bfxil x24, x22, #0, #16", + "neg w22, w21", + "ror x23, x24, x22", + "mov x22, x20", + "bfxil x22, x23, #0, #16", + "mov x4, x22", + "mov w20, #0x3f", + "sub x22, x20, x21", + "ror x20, x24, x22", "ubfx x21, x20, #0, #1", - "mrs x23, nzcv", - "mov w0, w23", - "bfi w0, w21, #29, #1", - "mov w21, w0", - "eor x20, x20, x22, lsr #15", - "ubfx x20, x20, #0, #1", - "mov w0, w21", - "bfi w0, w20, #28, #1", - "mov w20, w0", - "msr nzcv, x20" + "mrs x22, nzcv", + "mov w24, w22", + "bfi w24, w21, #29, #1", + "eor x21, x20, x23, lsr #15", + "ubfx x20, x21, #0, #1", + "mov w21, w24", + "bfi w21, w20, #28, #1", + "msr nzcv, x21" ] }, "rcl eax, cl": { - "ExpectedInstructionCount": 22, + "ExpectedInstructionCount": 24, "Comment": "GROUP2 0xd3 /2", "ExpectedArm64ASM": [ - "and w20, w5, #0x1f", - "cbz x20, #+0x54", - "lsl w20, w4, w5", - "cset w21, hs", - "neg w22, w5", - "lsr w23, w4, w22", - "orr w20, w20, w23, lsr #1", - "lsr w22, w4, w22", - "ubfx x23, x22, #0, #1", + "mov x20, x5", + "and w21, w20, #0x1f", + "cbz x21, #+0x58", + "mov x20, x5", + "mov x21, x4", + "lsl w22, w21, w20", + "cset w23, hs", + "neg w24, w20", + "lsr w25, w21, w24", + "orr w30, w22, w25, lsr #1", + "lsr w22, w21, w24", + "ubfx x21, x22, #0, #1", "mrs x24, nzcv", - "mov w0, w24", - "bfi w0, w23, #29, #1", - "mov w23, w0", - "sub w24, w5, #0x1 (1)", - "lsl w21, w21, w24", - "orr w4, w20, w21", - "eor w20, w4, w22, lsl #31", - "ubfx x20, x20, #31, #1", - "mov w0, w23", - "bfi w0, w20, #28, #1", - "mov w20, w0", + "mov w25, w24", + "bfi w25, w21, #29, #1", + "sub w21, w20, #0x1 (1)", + "lsl w20, w23, w21", + "orr w21, w30, w20", + "eor w20, w21, w22, lsl #31", + "ubfx x22, x20, #31, #1", + "mov w20, w25", + "bfi w20, w22, #28, #1", + "mov x4, x21", "msr nzcv, x20" ] }, "rcl rax, cl": { - "ExpectedInstructionCount": 22, + "ExpectedInstructionCount": 24, "Comment": "GROUP2 0xd3 /2", "ExpectedArm64ASM": [ - "and x20, x5, #0x3f", - "cbz x20, #+0x54", - "lsl x20, x4, x5", - "cset w21, hs", - "neg x22, x5", - "lsr x23, x4, x22", - "orr x20, x20, x23, lsr #1", - "lsr x22, x4, x22", - "ubfx x23, x22, #0, #1", + "mov x20, x5", + "and x21, x20, #0x3f", + "cbz x21, #+0x58", + "mov x20, x5", + "mov x21, x4", + "lsl x22, x21, x20", + "cset w23, hs", + "neg x24, x20", + "lsr x25, x21, x24", + "orr x30, x22, x25, lsr #1", + "lsr x22, x21, x24", + "ubfx x21, x22, #0, #1", "mrs x24, nzcv", - "mov w0, w24", - "bfi w0, w23, #29, #1", - "mov w23, w0", - "sub x24, x5, #0x1 (1)", - "lsl x21, x21, x24", - "orr x4, x20, x21", - "eor x20, x4, x22, lsl #63", - "lsr x20, x20, #63", - "mov w0, w23", - "bfi w0, w20, #28, #1", - "mov w20, w0", + "mov w25, w24", + "bfi w25, w21, #29, #1", + "sub x21, x20, #0x1 (1)", + "lsl x20, x23, x21", + "orr x21, x30, x20", + "eor x20, x21, x22, lsl #63", + "lsr x22, x20, #63", + "mov w20, w25", + "bfi w20, w22, #28, #1", + "mov x4, x21", "msr nzcv, x20" ] }, "rcr ax, cl": { - "ExpectedInstructionCount": 22, + "ExpectedInstructionCount": 28, "Comment": "GROUP2 0xd3 /3", "ExpectedArm64ASM": [ - "and w20, w5, #0x1f", - "cbz x20, #+0x54", - "cset w20, hs", - "uxth w21, w4", - "mov x0, x21", - "bfi x0, x20, #16, #1", - "mov x20, x0", - "bfi x20, x20, #17, #17", - "bfi x20, x20, #34, #17", - "lsr w21, w20, w5", - "bfxil x4, x21, #0, #16", - "sub w22, w5, #0x1 (1)", - "lsr w20, w20, w22", - "ubfx x20, x20, #0, #1", - "mrs x22, nzcv", - "mov w0, w22", - "bfi w0, w20, #29, #1", - "mov w20, w0", - "eor w21, w21, w21, lsr #1", - "ubfx x21, x21, #14, #1", + "mov x20, x5", + "and w21, w20, #0x1f", + "cbz x21, #+0x68", + "mov x20, x5", + "cset w21, hs", + "mov x22, x4", + "uxth w23, w22", + "mov x24, x23", + "bfi x24, x21, #16, #1", + "mov x21, x24", + "bfi x21, x24, #17, #17", + "mov x23, x21", + "bfi x23, x21, #34, #17", + "lsr w21, w23, w20", + "mov x24, x22", + "bfxil x24, x21, #0, #16", + "mov x4, x24", + "sub w22, w20, #0x1 (1)", + "lsr w20, w23, w22", + "ubfx x22, x20, #0, #1", + "mrs x20, nzcv", + "mov w23, w20", + "bfi w23, w22, #29, #1", + "eor w20, w21, w21, lsr #1", + "ubfx x21, x20, #14, #1", + "mov w20, w23", "bfi w20, w21, #28, #1", "msr nzcv, x20" ] }, "rcr eax, cl": { - "ExpectedInstructionCount": 22, + "ExpectedInstructionCount": 24, "Comment": "GROUP2 0xd3 /3", "ExpectedArm64ASM": [ - "and w20, w5, #0x1f", - "cbz x20, #+0x54", - "lsr w20, w4, w5", - "cset w21, hs", - "neg w22, w5", - "lsl w23, w4, w22", - "orr w20, w20, w23, lsl #1", - "sub w23, w5, #0x1 (1)", - "lsr w23, w4, w23", - "ubfx x23, x23, #0, #1", - "mrs x24, nzcv", - "mov w0, w24", - "bfi w0, w23, #29, #1", - "mov w23, w0", - "lsl w21, w21, w22", - "orr w4, w20, w21", - "eor w20, w4, w4, lsr #1", - "ubfx x20, x20, #30, #1", - "mov w0, w23", - "bfi w0, w20, #28, #1", - "mov w20, w0", + "mov x20, x5", + "and w21, w20, #0x1f", + "cbz x21, #+0x58", + "mov x20, x5", + "mov x21, x4", + "lsr w22, w21, w20", + "cset w23, hs", + "neg w24, w20", + "lsl w25, w21, w24", + "orr w30, w22, w25, lsl #1", + "sub w22, w20, #0x1 (1)", + "lsr w20, w21, w22", + "ubfx x21, x20, #0, #1", + "mrs x20, nzcv", + "mov w22, w20", + "bfi w22, w21, #29, #1", + "lsl w20, w23, w24", + "orr w21, w30, w20", + "eor w20, w21, w21, lsr #1", + "ubfx x23, x20, #30, #1", + "mov w20, w22", + "bfi w20, w23, #28, #1", + "mov x4, x21", "msr nzcv, x20" ] }, "rcr rax, cl": { - "ExpectedInstructionCount": 22, + "ExpectedInstructionCount": 24, "Comment": "GROUP2 0xd3 /3", "ExpectedArm64ASM": [ - "and x20, x5, #0x3f", - "cbz x20, #+0x54", - "lsr x20, x4, x5", - "cset w21, hs", - "neg x22, x5", - "lsl x23, x4, x22", - "orr x20, x20, x23, lsl #1", - "sub x23, x5, #0x1 (1)", - "lsr x23, x4, x23", - "ubfx x23, x23, #0, #1", - "mrs x24, nzcv", - "mov w0, w24", - "bfi w0, w23, #29, #1", - "mov w23, w0", - "lsl x21, x21, x22", - "orr x4, x20, x21", - "eor x20, x4, x4, lsr #1", - "ubfx x20, x20, #62, #1", - "mov w0, w23", - "bfi w0, w20, #28, #1", - "mov w20, w0", + "mov x20, x5", + "and x21, x20, #0x3f", + "cbz x21, #+0x58", + "mov x20, x5", + "mov x21, x4", + "lsr x22, x21, x20", + "cset w23, hs", + "neg x24, x20", + "lsl x25, x21, x24", + "orr x30, x22, x25, lsl #1", + "sub x22, x20, #0x1 (1)", + "lsr x20, x21, x22", + "ubfx x21, x20, #0, #1", + "mrs x20, nzcv", + "mov w22, w20", + "bfi w22, w21, #29, #1", + "lsl x20, x23, x24", + "orr x21, x30, x20", + "eor x20, x21, x21, lsr #1", + "ubfx x23, x20, #62, #1", + "mov w20, w22", + "bfi w20, w23, #28, #1", + "mov x4, x21", "msr nzcv, x20" ] }, "shl ax, cl": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 23, "Comment": "GROUP2 0xd3 /4", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w5", - "lsl w22, w20, w21", - "bfxil x4, x22, #0, #16", - "cbz w21, #+0x30", + "mov x20, x4", + "uxth w21, w20", + "mov x22, x5", + "uxth w23, w22", + "lsl w22, w21, w23", + "mov x24, x20", + "bfxil x24, x22, #0, #16", + "mov x4, x24", + "mov x20, x26", + "mov x24, x20", + "cbz w23, #+0x30", "cmn wzr, w22, lsl #16", - "mov x26, x22", + "mov x24, x22", "mov w0, #0x10", - "sub w0, w0, w21", - "lsr w0, w20, w0", - "eor w2, w20, w22", + "sub w0, w0, w23", + "lsr w0, w21, w0", + "eor w2, w21, w22", "mrs x1, nzcv", "bfi w1, w0, #29, #1", "lsr w2, w2, #15", "bfi w1, w2, #28, #1", - "msr nzcv, x1" + "msr nzcv, x1", + "mov x26, x24" ] }, "shl eax, cl": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 19, "Comment": "GROUP2 0xd3 /4", "ExpectedArm64ASM": [ - "mov w20, w4", - "mov w21, w5", - "lsl w4, w20, w21", - "cbz w21, #+0x28", - "ands w26, w4, w4", - "neg w0, w21", - "lsr w0, w20, w0", - "eor w2, w20, w4", + "mov x20, x4", + "mov w21, w20", + "mov x20, x5", + "mov w22, w20", + "lsl w20, w21, w22", + "mov x4, x20", + "mov x23, x26", + "mov x24, x23", + "cbz w22, #+0x28", + "ands w24, w20, w20", + "neg w0, w22", + "lsr w0, w21, w0", + "eor w2, w21, w20", "mrs x1, nzcv", "bfi w1, w0, #29, #1", "lsr w2, w2, #31", "bfi w1, w2, #28, #1", - "msr nzcv, x1" + "msr nzcv, x1", + "mov x26, x24" ] }, "shl rax, cl": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 17, "Comment": "GROUP2 0xd3 /4", "ExpectedArm64ASM": [ "mov x20, x4", - "lsl x4, x20, x5", - "cbz x5, #+0x28", - "ands x26, x4, x4", - "neg x0, x5", + "mov x21, x5", + "lsl x22, x20, x21", + "mov x4, x22", + "mov x23, x26", + "mov x24, x23", + "cbz x21, #+0x28", + "ands x24, x22, x22", + "neg x0, x21", "lsr x0, x20, x0", - "eor x2, x20, x4", + "eor x2, x20, x22", "mrs x1, nzcv", "bfi w1, w0, #29, #1", "lsr x2, x2, #63", "bfi w1, w2, #28, #1", - "msr nzcv, x1" + "msr nzcv, x1", + "mov x26, x24" ] }, "shr ax, cl": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 22, "Comment": "GROUP2 0xd3 /5", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w5", - "lsr w22, w20, w21", - "bfxil x4, x22, #0, #16", - "cbz w21, #+0x2c", + "mov x20, x4", + "uxth w21, w20", + "mov x22, x5", + "uxth w23, w22", + "lsr w22, w21, w23", + "mov x24, x20", + "bfxil x24, x22, #0, #16", + "mov x4, x24", + "mov x20, x26", + "mov x24, x20", + "cbz w23, #+0x2c", "cmn wzr, w22, lsl #16", - "mov x26, x22", - "sub x0, x21, #0x1 (1)", - "lsr w0, w20, w0", - "eor w2, w20, w22", + "mov x24, x22", + "sub x0, x23, #0x1 (1)", + "lsr w0, w21, w0", + "eor w2, w21, w22", "mrs x1, nzcv", "bfi w1, w0, #29, #1", "lsr w2, w2, #15", "bfi w1, w2, #28, #1", - "msr nzcv, x1" + "msr nzcv, x1", + "mov x26, x24" ] }, "shr eax, cl": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 19, "Comment": "GROUP2 0xd3 /5", "ExpectedArm64ASM": [ - "mov w20, w4", - "mov w21, w5", - "lsr w4, w20, w21", - "cbz w21, #+0x28", - "ands w26, w4, w4", - "sub x0, x21, #0x1 (1)", - "lsr w0, w20, w0", - "eor w2, w20, w4", + "mov x20, x4", + "mov w21, w20", + "mov x20, x5", + "mov w22, w20", + "lsr w20, w21, w22", + "mov x4, x20", + "mov x23, x26", + "mov x24, x23", + "cbz w22, #+0x28", + "ands w24, w20, w20", + "sub x0, x22, #0x1 (1)", + "lsr w0, w21, w0", + "eor w2, w21, w20", "mrs x1, nzcv", "bfi w1, w0, #29, #1", "lsr w2, w2, #31", "bfi w1, w2, #28, #1", - "msr nzcv, x1" + "msr nzcv, x1", + "mov x26, x24" ] }, "shr rax, cl": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 17, "Comment": "GROUP2 0xd3 /5", "ExpectedArm64ASM": [ "mov x20, x4", - "lsr x4, x20, x5", - "cbz x5, #+0x28", - "ands x26, x4, x4", - "sub x0, x5, #0x1 (1)", + "mov x21, x5", + "lsr x22, x20, x21", + "mov x4, x22", + "mov x23, x26", + "mov x24, x23", + "cbz x21, #+0x28", + "ands x24, x22, x22", + "sub x0, x21, #0x1 (1)", "lsr x0, x20, x0", - "eor x2, x20, x4", + "eor x2, x20, x22", "mrs x1, nzcv", "bfi w1, w0, #29, #1", "lsr x2, x2, #63", "bfi w1, w2, #28, #1", - "msr nzcv, x1" + "msr nzcv, x1", + "mov x26, x24" ] }, "sar ax, cl": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 20, "Comment": "GROUP2 0xd3 /7", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w5", - "sxth x20, w20", - "asr w22, w20, w21", - "bfxil x4, x22, #0, #16", - "cbz w21, #+0x20", - "cmn wzr, w22, lsl #16", - "mov x26, x22", - "sub x0, x21, #0x1 (1)", - "lsr w0, w20, w0", + "mov x20, x4", + "uxth w21, w20", + "mov x22, x5", + "uxth w23, w22", + "sxth x22, w21", + "asr w21, w22, w23", + "mov x24, x20", + "bfxil x24, x21, #0, #16", + "mov x4, x24", + "mov x20, x26", + "mov x24, x20", + "cbz w23, #+0x20", + "cmn wzr, w21, lsl #16", + "mov x24, x21", + "sub x0, x23, #0x1 (1)", + "lsr w0, w22, w0", "mrs x1, nzcv", "bfi w1, w0, #29, #1", - "msr nzcv, x1" + "msr nzcv, x1", + "mov x26, x24" ] }, "sar eax, cl": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xd3 /7", "ExpectedArm64ASM": [ - "mov w20, w4", - "mov w21, w5", - "asr w4, w20, w21", - "cbz w21, #+0x1c", - "ands w26, w4, w4", - "sub x0, x21, #0x1 (1)", - "lsr w0, w20, w0", + "mov x20, x4", + "mov w21, w20", + "mov x20, x5", + "mov w22, w20", + "asr w20, w21, w22", + "mov x4, x20", + "mov x23, x26", + "mov x24, x23", + "cbz w22, #+0x1c", + "ands w24, w20, w20", + "sub x0, x22, #0x1 (1)", + "lsr w0, w21, w0", "mrs x1, nzcv", "bfi w1, w0, #29, #1", - "msr nzcv, x1" + "msr nzcv, x1", + "mov x26, x24" ] }, "sar rax, cl": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 14, "Comment": "GROUP2 0xd3 /7", "ExpectedArm64ASM": [ "mov x20, x4", - "asr x4, x20, x5", - "cbz x5, #+0x1c", - "ands x26, x4, x4", - "sub x0, x5, #0x1 (1)", + "mov x21, x5", + "asr x22, x20, x21", + "mov x4, x22", + "mov x23, x26", + "mov x24, x23", + "cbz x21, #+0x1c", + "ands x24, x22, x22", + "sub x0, x21, #0x1 (1)", "lsr x0, x20, x0", "mrs x1, nzcv", "bfi w1, w0, #29, #1", - "msr nzcv, x1" + "msr nzcv, x1", + "mov x26, x24" ] }, "test bl, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP2 0xf6 /0", "ExpectedArm64ASM": [ - "and w26, w7, #0x1", - "cmn wzr, w26, lsl #24" + "mov x20, x7", + "and w21, w20, #0x1", + "cmn wzr, w21, lsl #24", + "mov x26, x21" ] }, "not bl": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf6 /2", "ExpectedArm64ASM": [ - "eor x7, x7, #0xff" + "mov x20, x7", + "eor x21, x20, #0xff", + "mov x7, x21" ] }, "not bh": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf6 /2", "ExpectedArm64ASM": [ - "eor x7, x7, #0xff00" + "mov x20, x7", + "eor x21, x20, #0xff00", + "mov x7, x21" ] }, "neg bl": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xf6 /3", "ExpectedArm64ASM": [ - "mov x27, x7", - "cmp wzr, w27, lsl #24", - "neg w26, w27", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x7, x27", - "bfxil x7, x26, #0, #8", - "msr nzcv, x20" + "mov x20, x7", + "mov x27, x20", + "cmp wzr, w20, lsl #24", + "neg w21, w20", + "mov x26, x21", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x7, x22", + "msr nzcv, x23" ] }, "mul bl": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xf6 /4", "ExpectedArm64ASM": [ - "uxtb x20, w7", - "uxtb x21, w4", - "mul x20, x20, x21", - "bfxil x4, x20, #0, #16", - "ubfx x20, x20, #8, #8", + "mov x20, x7", + "mov x21, x4", + "uxtb x22, w20", + "uxtb x20, w21", + "mul x23, x22, x20", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x4, x20", + "ubfx x20, x23, #8, #8", "cmp x20, #0x0 (0)", "ccmn xzr, #0, #nzCV, eq" ] }, "imul bl": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": "GROUP2 0xf6 /5", "ExpectedArm64ASM": [ - "sxtb x20, w7", - "sxtb x21, w4", - "mul x20, x20, x21", - "bfxil x4, x20, #0, #16", - "sbfx x21, x20, #8, #8", - "sbfx x20, x20, #7, #1", - "cmp x21, x20", + "mov x20, x7", + "mov x21, x4", + "sxtb x22, w20", + "sxtb x20, w21", + "mul x23, x22, x20", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x4, x20", + "sbfx x20, x23, #8, #8", + "sbfx x21, x23, #7, #1", + "cmp x20, x21", "ccmn xzr, #0, #nzCV, eq" ] }, "div bl": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xf6 /6", "ExpectedArm64ASM": [ - "uxtb w20, w7", - "uxth w21, w4", - "uxth w0, w21", - "uxth w1, w20", - "udiv w22, w0, w1", - "uxth w0, w21", - "uxth w1, w20", + "mov x20, x7", + "uxtb w21, w20", + "mov x20, x4", + "uxth w22, w20", + "uxth w0, w22", + "uxth w1, w21", + "udiv w23, w0, w1", + "uxth w0, w22", + "uxth w1, w21", "udiv w2, w0, w1", - "msub w20, w2, w1, w0", - "mov x0, x22", - "bfi x0, x20, #8, #8", - "mov x20, x0", - "bfxil x4, x20, #0, #16" + "msub w24, w2, w1, w0", + "mov x21, x23", + "bfi x21, x24, #8, #8", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "idiv bl": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 14, "Comment": "GROUP2 0xf6 /7", "ExpectedArm64ASM": [ - "uxtb w20, w7", - "uxth w21, w4", - "sxth x21, w21", - "sxtb x20, w20", - "sdiv x22, x21, x20", - "sdiv x0, x21, x20", - "msub x20, x0, x20, x21", - "mov x0, x22", - "bfi x0, x20, #8, #8", - "mov x20, x0", - "bfxil x4, x20, #0, #16" + "mov x20, x7", + "uxtb w21, w20", + "mov x20, x4", + "uxth w22, w20", + "sxth x23, w22", + "sxtb x22, w21", + "sdiv x21, x23, x22", + "sdiv x0, x23, x22", + "msub x24, x0, x22, x23", + "mov x22, x21", + "bfi x22, x24, #8, #8", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "test bx, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "and w26, w7, #0x1", - "cmn wzr, w26, lsl #16" + "mov x20, x7", + "and w21, w20, #0x1", + "cmn wzr, w21, lsl #16", + "mov x26, x21" ] }, "test ebx, 1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "ands w26, w7, #0x1" + "mov x20, x7", + "ands w21, w20, #0x1", + "mov x26, x21" ] }, "test rbx, 1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "ands x26, x7, #0x1" + "mov x20, x7", + "ands x21, x20, #0x1", + "mov x26, x21" ] }, "test bx, -1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "mov x26, x7", - "cmn wzr, w26, lsl #16" + "mov x20, x7", + "mov x21, x20", + "cmn wzr, w21, lsl #16", + "mov x26, x21" ] }, "test ebx, -1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "ands w26, w7, w7" + "mov x20, x7", + "ands w21, w20, w20", + "mov x26, x21" ] }, "test rbx, -1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /0", "ExpectedArm64ASM": [ - "ands x26, x7, x7" + "mov x20, x7", + "ands x21, x20, x20", + "mov x26, x21" ] }, "not bx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /1", "ExpectedArm64ASM": [ - "eor x7, x7, #0xffff" + "mov x20, x7", + "eor x21, x20, #0xffff", + "mov x7, x21" ] }, "not ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /1", "ExpectedArm64ASM": [ - "mvn w7, w7" + "mov x20, x7", + "mvn w21, w20", + "mov x7, x21" ] }, "not rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "GROUP2 0xf7 /1", "ExpectedArm64ASM": [ - "mvn x7, x7" + "mov x20, x7", + "mvn x21, x20", + "mov x7, x21" ] }, "neg bx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xf7 /2", "ExpectedArm64ASM": [ - "mov x27, x7", - "cmp wzr, w27, lsl #16", - "neg w26, w27", - "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x7, x27", - "bfxil x7, x26, #0, #16", - "msr nzcv, x20" + "mov x20, x7", + "mov x27, x20", + "cmp wzr, w20, lsl #16", + "neg w21, w20", + "mov x26, x21", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x7, x22", + "msr nzcv, x23" ] }, "neg ebx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP2 0xf7 /2", "ExpectedArm64ASM": [ - "mov x27, x7", - "negs w26, w27", + "mov x20, x7", + "mov x27, x20", + "negs w21, w20", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x7, x26", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x7, x21", + "msr nzcv, x22" ] }, "neg rbx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "GROUP2 0xf7 /2", "ExpectedArm64ASM": [ - "mov x27, x7", - "negs x26, x27", + "mov x20, x7", + "mov x27, x20", + "negs x21, x20", + "mov x26, x21", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "mov x7, x26", - "msr nzcv, x20" + "eor w22, w20, #0x20000000", + "mov x7, x21", + "msr nzcv, x22" ] }, "mul bx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 15, "Comment": "GROUP2 0xf7 /3", "ExpectedArm64ASM": [ - "uxth x20, w7", - "uxth x21, w4", - "mul x20, x20, x21", - "bfxil x4, x20, #0, #16", - "ubfx x20, x20, #16, #16", - "bfxil x6, x20, #0, #16", + "mov x20, x7", + "mov x21, x4", + "uxth x22, w20", + "uxth x20, w21", + "mul x23, x22, x20", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x4, x20", + "ubfx x20, x23, #16, #16", + "mov x21, x6", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x6, x22", "cmp x20, #0x0 (0)", "ccmn xzr, #0, #nzCV, eq" ] }, "mul ebx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "GROUP2 0xf7 /3", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "mul x20, x20, x21", - "mov w4, w20", - "lsr x6, x20, #32", - "cmp x6, #0x0 (0)", + "mov x20, x7", + "mov x21, x4", + "mov w22, w20", + "mov w20, w21", + "mul x21, x22, x20", + "mov w20, w21", + "lsr x22, x21, #32", + "mov x4, x20", + "mov x6, x22", + "cmp x22, #0x0 (0)", "ccmn xzr, #0, #nzCV, eq" ] }, "mul rbx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP2 0xf7 /3", "ExpectedArm64ASM": [ - "mov x20, x4", - "mul x4, x7, x20", - "umulh x6, x7, x20", - "cmp x6, #0x0 (0)", + "mov x20, x7", + "mov x21, x4", + "mul x22, x20, x21", + "umulh x23, x20, x21", + "mov x4, x22", + "mov x6, x23", + "cmp x23, #0x0 (0)", "ccmn xzr, #0, #nzCV, eq" ] }, "imul bx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 16, "Comment": "GROUP2 0xf7 /5", "ExpectedArm64ASM": [ - "sxth x20, w7", - "sxth x21, w4", - "mul x20, x20, x21", - "bfxil x4, x20, #0, #16", - "sbfx x21, x20, #16, #16", - "bfxil x6, x21, #0, #16", - "sbfx x20, x20, #15, #1", - "cmp x21, x20", + "mov x20, x7", + "mov x21, x4", + "sxth x22, w20", + "sxth x20, w21", + "mul x23, x22, x20", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x4, x20", + "sbfx x20, x23, #16, #16", + "mov x21, x6", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x6, x22", + "sbfx x21, x23, #15, #1", + "cmp x20, x21", "ccmn xzr, #0, #nzCV, eq" ] }, "imul ebx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 14, "Comment": "GROUP2 0xf7 /5", "ExpectedArm64ASM": [ - "sxtw x20, w7", - "sxtw x21, w4", - "mul x20, x20, x21", - "mov w4, w20", - "lsr x6, x20, #32", - "asr x21, x20, #32", - "sxtw x20, w20", - "sbfx x20, x20, #31, #1", - "cmp x21, x20", + "mov x20, x7", + "mov x21, x4", + "sxtw x22, w20", + "sxtw x20, w21", + "mul x21, x22, x20", + "mov w20, w21", + "lsr x22, x21, #32", + "asr x23, x21, #32", + "sxtw x24, w21", + "mov x4, x20", + "mov x6, x22", + "sbfx x20, x24, #31, #1", + "cmp x23, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "imul rbx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "GROUP2 0xf7 /5", "ExpectedArm64ASM": [ - "smulh x6, x7, x4", - "mul x4, x7, x4", - "asr x20, x4, #63", - "cmp x6, x20", + "mov x20, x7", + "mov x21, x4", + "smulh x22, x20, x21", + "mul x23, x20, x21", + "mov x4, x23", + "mov x6, x22", + "asr x20, x23, #63", + "cmp x22, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "div bx": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 19, "Comment": "GROUP2 0xf7 /6", "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w4", - "uxth w22, w6", - "uxth w0, w21", - "bfi w0, w22, #16, #16", - "udiv w23, w0, w20", - "uxth w0, w21", - "bfi w0, w22, #16, #16", - "udiv w1, w0, w20", - "msub w20, w1, w20, w0", - "bfxil x4, x23, #0, #16", - "bfxil x6, x20, #0, #16" + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "uxth w22, w20", + "mov x23, x6", + "uxth w24, w23", + "uxth w0, w22", + "bfi w0, w24, #16, #16", + "udiv w25, w0, w21", + "uxth w0, w22", + "bfi w0, w24, #16, #16", + "udiv w1, w0, w21", + "msub w30, w1, w21, w0", + "mov x21, x20", + "bfxil x21, x25, #0, #16", + "mov x4, x21", + "mov x20, x23", + "bfxil x20, x30, #0, #16", + "mov x6, x20" ] }, "div ebx": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 17, "Comment": "GROUP2 0xf7 /6", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "mov w22, w6", - "mov x0, x21", - "bfi x0, x22, #32, #32", - "udiv x23, x0, x20", - "mov w4, w23", - "mov x0, x21", - "bfi x0, x22, #32, #32", - "udiv x1, x0, x20", - "msub x20, x1, x20, x0", - "mov w6, w20" + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "mov w22, w20", + "mov x20, x6", + "mov w23, w20", + "mov x0, x22", + "bfi x0, x23, #32, #32", + "udiv x20, x0, x21", + "mov w24, w20", + "mov x0, x22", + "bfi x0, x23, #32, #32", + "udiv x1, x0, x21", + "msub x20, x1, x21, x0", + "mov w21, w20", + "mov x4, x24", + "mov x6, x21" ] }, "div rbx": { - "ExpectedInstructionCount": 24, + "ExpectedInstructionCount": 28, "Comment": "GROUP2 0xf7 /6", "ExpectedArm64ASM": [ - "mov x20, x4", - "cbz x6, #+0x28", - "mov x0, x6", - "mov x1, x20", - "mov x2, x7", + "mov x20, x7", + "mov x21, x4", + "mov x22, x6", + "cbz x22, #+0x28", + "mov x0, x22", + "mov x1, x21", + "mov x2, x20", "ldr x3, [x28, #2432]", "str x30, [sp, #-16]!", "blr x3", "ldr x30, [sp], #16", - "mov x4, x0", + "mov x23, x0", "b #+0x8", - "udiv x4, x20, x7", - "cbz x6, #+0x28", - "mov x0, x6", - "mov x1, x20", - "mov x2, x7", + "udiv x23, x21, x20", + "cbz x22, #+0x28", + "mov x0, x22", + "mov x1, x21", + "mov x2, x20", "ldr x3, [x28, #2448]", "str x30, [sp, #-16]!", "blr x3", "ldr x30, [sp], #16", - "mov x6, x0", + "mov x24, x0", "b #+0xc", - "udiv x0, x20, x7", - "msub x6, x0, x7, x20" + "udiv x0, x21, x20", + "msub x24, x0, x20, x21", + "mov x4, x23", + "mov x6, x24" ] }, "idiv bx": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 21, "Comment": "GROUP2 0xf7 /7", "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w4", - "uxth w22, w6", - "uxth w0, w21", - "bfi w0, w22, #16, #16", - "sxth w1, w20", - "sdiv w23, w0, w1", - "uxth w0, w21", - "bfi w0, w22, #16, #16", - "sxth w1, w20", + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "uxth w22, w20", + "mov x23, x6", + "uxth w24, w23", + "uxth w0, w22", + "bfi w0, w24, #16, #16", + "sxth w1, w21", + "sdiv w25, w0, w1", + "uxth w0, w22", + "bfi w0, w24, #16, #16", + "sxth w1, w21", "sdiv w2, w0, w1", - "msub w20, w2, w1, w0", - "bfxil x4, x23, #0, #16", - "bfxil x6, x20, #0, #16" + "msub w30, w2, w1, w0", + "mov x21, x20", + "bfxil x21, x25, #0, #16", + "mov x4, x21", + "mov x20, x23", + "bfxil x20, x30, #0, #16", + "mov x6, x20" ] }, "idiv ebx": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 19, "Comment": "GROUP2 0xf7 /7", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "mov w22, w6", - "mov x0, x21", - "bfi x0, x22, #32, #32", - "sxtw x1, w20", - "sdiv x23, x0, x1", - "mov w4, w23", - "mov x0, x21", - "bfi x0, x22, #32, #32", - "sxtw x2, w20", + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "mov w22, w20", + "mov x20, x6", + "mov w23, w20", + "mov x0, x22", + "bfi x0, x23, #32, #32", + "sxtw x1, w21", + "sdiv x20, x0, x1", + "mov w24, w20", + "mov x0, x22", + "bfi x0, x23, #32, #32", + "sxtw x2, w21", "sdiv x1, x0, x2", "msub x20, x1, x2, x0", - "mov w6, w20" + "mov w21, w20", + "mov x4, x24", + "mov x6, x21" ] }, "idiv rbx": { - "ExpectedInstructionCount": 28, + "ExpectedInstructionCount": 32, "Comment": "GROUP2 0xf7 /7", "ExpectedArm64ASM": [ - "mov x20, x4", - "asr x0, x20, #63", - "eor x0, x0, x6", + "mov x20, x7", + "mov x21, x4", + "mov x22, x6", + "asr x0, x21, #63", + "eor x0, x0, x22", "cbz x0, #+0x28", - "mov x0, x6", - "mov x1, x20", - "mov x2, x7", + "mov x0, x22", + "mov x1, x21", + "mov x2, x20", "ldr x3, [x28, #2440]", "str x30, [sp, #-16]!", "blr x3", "ldr x30, [sp], #16", - "mov x4, x0", + "mov x23, x0", "b #+0x8", - "sdiv x4, x20, x7", - "asr x0, x20, #63", - "eor x0, x0, x6", + "sdiv x23, x21, x20", + "asr x0, x21, #63", + "eor x0, x0, x22", "cbz x0, #+0x28", - "mov x0, x6", - "mov x1, x20", - "mov x2, x7", + "mov x0, x22", + "mov x1, x21", + "mov x2, x20", "ldr x3, [x28, #2456]", "str x30, [sp, #-16]!", "blr x3", "ldr x30, [sp], #16", - "mov x6, x0", + "mov x24, x0", "b #+0xc", - "sdiv x0, x20, x7", - "msub x6, x0, x7, x20" + "sdiv x0, x21, x20", + "msub x24, x0, x20, x21", + "mov x4, x23", + "mov x6, x24" ] }, "inc al": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 16, "Comment": "GROUP3 0xfe /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "uxtb w27, w4", - "cset w21, hs", - "lsl w0, w27, #24", + "mov x21, x4", + "uxtb w22, w21", + "cset w23, hs", + "mov x27, x22", + "lsl w0, w22, #24", "cmn w0, w20, lsl #24", - "add w26, w27, #0x1 (1)", - "mrs x20, nzcv", - "bfi w20, w21, #29, #1", - "bfxil x4, x26, #0, #8", - "msr nzcv, x20" + "add w20, w22, #0x1 (1)", + "mov x26, x20", + "mrs x22, nzcv", + "mov w24, w22", + "bfi w24, w23, #29, #1", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22", + "msr nzcv, x24" ] }, "dec al": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 16, "Comment": "GROUP3 0xfe /1", "ExpectedArm64ASM": [ "mov w20, #0x1", - "uxtb w27, w4", - "cset w21, hs", - "lsl w0, w27, #24", + "mov x21, x4", + "uxtb w22, w21", + "cset w23, hs", + "mov x27, x22", + "lsl w0, w22, #24", "cmp w0, w20, lsl #24", - "sub w26, w27, #0x1 (1)", - "mrs x20, nzcv", - "bfi w20, w21, #29, #1", - "bfxil x4, x26, #0, #8", - "msr nzcv, x20" + "sub w20, w22, #0x1 (1)", + "mov x26, x20", + "mrs x22, nzcv", + "mov w24, w22", + "bfi w24, w23, #29, #1", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22", + "msr nzcv, x24" ] }, "inc ax": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 16, "Comment": "GROUP4 0xfe /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "uxth w27, w4", - "cset w21, hs", - "lsl w0, w27, #16", + "mov x21, x4", + "uxth w22, w21", + "cset w23, hs", + "mov x27, x22", + "lsl w0, w22, #16", "cmn w0, w20, lsl #16", - "add w26, w27, #0x1 (1)", - "mrs x20, nzcv", - "bfi w20, w21, #29, #1", - "bfxil x4, x26, #0, #16", - "msr nzcv, x20" + "add w20, w22, #0x1 (1)", + "mov x26, x20", + "mrs x22, nzcv", + "mov w24, w22", + "bfi w24, w23, #29, #1", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22", + "msr nzcv, x24" ] }, "inc eax": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": "GROUP4 0xfe /0", "ExpectedArm64ASM": [ - "mov x27, x4", - "cset w20, hs", - "adds w26, w27, #0x1 (1)", - "mrs x21, nzcv", - "mov w0, w21", - "bfi w0, w20, #29, #1", - "mov w20, w0", - "mov x4, x26", - "msr nzcv, x20" + "mov x20, x4", + "cset w21, hs", + "mov x27, x20", + "adds w22, w20, #0x1 (1)", + "mov x26, x22", + "mrs x20, nzcv", + "mov w23, w20", + "bfi w23, w21, #29, #1", + "mov x4, x22", + "msr nzcv, x23" ] }, "inc rax": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": "GROUP4 0xfe /0", "ExpectedArm64ASM": [ - "mov x27, x4", - "cset w20, hs", - "adds x26, x27, #0x1 (1)", - "mrs x21, nzcv", - "mov w0, w21", - "bfi w0, w20, #29, #1", - "mov w20, w0", - "mov x4, x26", - "msr nzcv, x20" + "mov x20, x4", + "cset w21, hs", + "mov x27, x20", + "adds x22, x20, #0x1 (1)", + "mov x26, x22", + "mrs x20, nzcv", + "mov w23, w20", + "bfi w23, w21, #29, #1", + "mov x4, x22", + "msr nzcv, x23" ] }, "dec ax": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 16, "Comment": "GROUP4 0xfe /1", "ExpectedArm64ASM": [ "mov w20, #0x1", - "uxth w27, w4", - "cset w21, hs", - "lsl w0, w27, #16", + "mov x21, x4", + "uxth w22, w21", + "cset w23, hs", + "mov x27, x22", + "lsl w0, w22, #16", "cmp w0, w20, lsl #16", - "sub w26, w27, #0x1 (1)", - "mrs x20, nzcv", - "bfi w20, w21, #29, #1", - "bfxil x4, x26, #0, #16", - "msr nzcv, x20" + "sub w20, w22, #0x1 (1)", + "mov x26, x20", + "mrs x22, nzcv", + "mov w24, w22", + "bfi w24, w23, #29, #1", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22", + "msr nzcv, x24" ] }, "dec eax": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": "GROUP4 0xfe /1", "ExpectedArm64ASM": [ - "mov x27, x4", - "cset w20, hs", - "subs w26, w27, #0x1 (1)", - "mrs x21, nzcv", - "mov w0, w21", - "bfi w0, w20, #29, #1", - "mov w20, w0", - "mov x4, x26", - "msr nzcv, x20" + "mov x20, x4", + "cset w21, hs", + "mov x27, x20", + "subs w22, w20, #0x1 (1)", + "mov x26, x22", + "mrs x20, nzcv", + "mov w23, w20", + "bfi w23, w21, #29, #1", + "mov x4, x22", + "msr nzcv, x23" ] }, "dec rax": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": "GROUP4 0xfe /1", "ExpectedArm64ASM": [ - "mov x27, x4", - "cset w20, hs", - "subs x26, x27, #0x1 (1)", - "mrs x21, nzcv", - "mov w0, w21", - "bfi w0, w20, #29, #1", - "mov w20, w0", - "mov x4, x26", - "msr nzcv, x20" + "mov x20, x4", + "cset w21, hs", + "mov x27, x20", + "subs x22, x20, #0x1 (1)", + "mov x26, x22", + "mrs x20, nzcv", + "mov w23, w20", + "bfi w23, w21, #29, #1", + "mov x4, x22", + "msr nzcv, x23" ] }, "push ax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "GROUP4 0xff /6", "ExpectedArm64ASM": [ - "strh w4, [x8, #-2]!" + "mov x20, x4", + "mov x21, x8", + "mov x22, x21", + "strh w20, [x22, #-2]!", + "mov x8, x22" ] }, "push rax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "GROUP4 0xff /6", "ExpectedArm64ASM": [ - "str x4, [x8, #-8]!" + "mov x20, x4", + "mov x21, x8", + "mov x22, x21", + "str x20, [x22, #-8]!", + "mov x8, x22" ] }, "mov byte [rax], 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP11 0xc6 /0", "ExpectedArm64ASM": [ "mov w20, #0x0", - "strb w20, [x4]" + "mov x21, x4", + "strb w20, [x21]" ] }, "mov word [rax], 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP11 0xc7 /0", "ExpectedArm64ASM": [ "mov w20, #0x0", - "strh w20, [x4]" + "mov x21, x4", + "strh w20, [x21]" ] }, "mov dword [rax], 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP11 0xc7 /0", "ExpectedArm64ASM": [ "mov w20, #0x0", - "str w20, [x4]" + "mov x21, x4", + "str w20, [x21]" ] }, "mov qword [rax], 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP11 0xc7 /0", "ExpectedArm64ASM": [ "mov w20, #0x0", - "str x20, [x4]" + "mov x21, x4", + "str x20, [x21]" ] }, "mov byte [rax], 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP11 0xc6 /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "strb w20, [x4]" + "mov x21, x4", + "strb w20, [x21]" ] }, "mov word [rax], 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP11 0xc7 /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "strh w20, [x4]" + "mov x21, x4", + "strh w20, [x21]" ] }, "mov dword [rax], 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP11 0xc7 /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "str w20, [x4]" + "mov x21, x4", + "str w20, [x21]" ] }, "mov qword [rax], 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP11 0xc7 /0", "ExpectedArm64ASM": [ "mov w20, #0x1", - "str x20, [x4]" + "mov x21, x4", + "str x20, [x21]" ] }, "mov byte [rax], -1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP11 0xc6 /0", "ExpectedArm64ASM": [ "mov w20, #0xff", - "strb w20, [x4]" + "mov x21, x4", + "strb w20, [x21]" ] }, "mov word [rax], -1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP11 0xc7 /0", "ExpectedArm64ASM": [ "mov w20, #0xffff", - "strh w20, [x4]" + "mov x21, x4", + "strh w20, [x21]" ] }, "mov dword [rax], -1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP11 0xc7 /0", "ExpectedArm64ASM": [ "mov w20, #0xffffffff", - "str w20, [x4]" + "mov x21, x4", + "str w20, [x21]" ] }, "mov qword [rax], -1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP11 0xc7 /0", "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", - "str x20, [x4]" + "mov x21, x4", + "str x20, [x21]" ] } } diff --git a/unittests/InstructionCountCI/Primary_32Bit.json b/unittests/InstructionCountCI/Primary_32Bit.json index cf8b6df87f..4de497d130 100644 --- a/unittests/InstructionCountCI/Primary_32Bit.json +++ b/unittests/InstructionCountCI/Primary_32Bit.json @@ -11,390 +11,523 @@ }, "Instructions": { "push es": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x06", "ExpectedArm64ASM": [ - "ldrh w20, [x28, #136]", - "str w20, [x8, #-4]!" + "mov w20, w8", + "ldrh w21, [x28, #136]", + "mov w22, w20", + "str w21, [x22, #-4]!", + "mov w8, w22" ] }, "pop es": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x07", "ExpectedArm64ASM": [ - "ldr w20, [x8]", - "add x8, x8, #0x4 (4)", - "strh w20, [x28, #136]", - "ubfx w20, w20, #3, #13", + "mov w20, w8", + "ldr w21, [x20]", + "add x22, x20, #0x4 (4)", + "mov w8, w22", + "strh w21, [x28, #136]", + "ubfx w20, w21, #3, #13", "add x0, x28, x20, lsl #2", - "ldr w20, [x0, #896]", - "str w20, [x28, #152]" + "ldr w21, [x0, #896]", + "str w21, [x28, #152]" ] }, "push cs": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0e", "ExpectedArm64ASM": [ - "ldrh w20, [x28, #138]", - "str w20, [x8, #-4]!" + "mov w20, w8", + "ldrh w21, [x28, #138]", + "mov w22, w20", + "str w21, [x22, #-4]!", + "mov w8, w22" ] }, "push ss": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x16", "ExpectedArm64ASM": [ - "ldrh w20, [x28, #140]", - "str w20, [x8, #-4]!" + "mov w20, w8", + "ldrh w21, [x28, #140]", + "mov w22, w20", + "str w21, [x22, #-4]!", + "mov w8, w22" ] }, "pop ss": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x17", "ExpectedArm64ASM": [ - "ldr w20, [x8]", - "add x8, x8, #0x4 (4)", - "strh w20, [x28, #140]", - "ubfx w20, w20, #3, #13", + "mov w20, w8", + "ldr w21, [x20]", + "add x22, x20, #0x4 (4)", + "mov w8, w22", + "strh w21, [x28, #140]", + "ubfx w20, w21, #3, #13", "add x0, x28, x20, lsl #2", - "ldr w20, [x0, #896]", - "str w20, [x28, #160]" + "ldr w21, [x0, #896]", + "str w21, [x28, #160]" ] }, "push ds": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x1e", "ExpectedArm64ASM": [ - "ldrh w20, [x28, #142]", - "str w20, [x8, #-4]!" + "mov w20, w8", + "ldrh w21, [x28, #142]", + "mov w22, w20", + "str w21, [x22, #-4]!", + "mov w8, w22" ] }, "pop ds": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x1f", "ExpectedArm64ASM": [ - "ldr w20, [x8]", - "add x8, x8, #0x4 (4)", - "strh w20, [x28, #142]", - "ubfx w20, w20, #3, #13", + "mov w20, w8", + "ldr w21, [x20]", + "add x22, x20, #0x4 (4)", + "mov w8, w22", + "strh w21, [x28, #142]", + "ubfx w20, w21, #3, #13", "add x0, x28, x20, lsl #2", - "ldr w20, [x0, #896]", - "str w20, [x28, #164]" + "ldr w21, [x0, #896]", + "str w21, [x28, #164]" ] }, "daa": { - "ExpectedInstructionCount": 23, + "ExpectedInstructionCount": 30, "Comment": "0x27", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "cset w21, hs", - "and x22, x20, #0xf", - "cmp x22, #0x9 (9)", - "cset x22, hi", - "eor w23, w27, w26", - "ubfx w23, w23, #4, #1", - "orr x22, x23, x22", - "cmp x20, #0x99 (153)", + "mov w20, w4", + "uxtb w21, w20", + "cset w22, hs", + "and x23, x21, #0xf", + "cmp x23, #0x9 (9)", + "cset x24, hi", + "mov w23, w27", + "mov w25, w26", + "eor w12, w23, w25", + "ubfx w23, w12, #4, #1", + "orr x25, x23, x24", + "cmp x21, #0x99 (153)", "cset x23, hi", - "orr x21, x21, x23", - "add x23, x20, #0x6 (6)", - "cmp x22, #0x0 (0)", - "csel x20, x23, x20, ne", - "add x23, x20, #0x60 (96)", - "cmp x21, #0x0 (0)", - "csel x26, x23, x20, ne", - "bfxil w4, w26, #0, #8", - "cmn wzr, w26, lsl #24", + "orr x24, x22, x23", + "add x22, x21, #0x6 (6)", + "cmp x25, #0x0 (0)", + "csel x23, x22, x21, ne", + "add x21, x23, #0x60 (96)", + "cmp x24, #0x0 (0)", + "csel x22, x21, x23, ne", + "mov w21, w20", + "bfxil w21, w22, #0, #8", + "mov w4, w21", + "cmn wzr, w22, lsl #24", "mrs x20, nzcv", - "orr w20, w20, w21, lsl #29", - "eor w27, w26, w22, lsl #4", - "msr nzcv, x20" + "orr w21, w20, w24, lsl #29", + "mov w26, w22", + "eor w20, w22, w25, lsl #4", + "mov w27, w20", + "msr nzcv, x21" ] }, "das": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 33, "Comment": "0x2f", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "cset w21, hs", - "and x22, x20, #0xf", - "cmp x22, #0x9 (9)", - "cset x22, hi", - "eor w23, w27, w26", - "ubfx w23, w23, #4, #1", - "orr x22, x23, x22", - "cmp x20, #0x99 (153)", + "mov w20, w4", + "uxtb w21, w20", + "cset w22, hs", + "and x23, x21, #0xf", + "cmp x23, #0x9 (9)", + "cset x24, hi", + "mov w23, w27", + "mov w25, w26", + "eor w12, w23, w25", + "ubfx w23, w12, #4, #1", + "orr x25, x23, x24", + "cmp x21, #0x99 (153)", "cset x23, hi", - "orr x21, x21, x23", - "cmp x20, #0x6 (6)", - "csel x23, x22, x21, lo", - "orr w23, w21, w23", - "sub x24, x20, #0x6 (6)", - "cmp x22, #0x0 (0)", - "csel x20, x24, x20, ne", - "sub x24, x20, #0x60 (96)", - "cmp x21, #0x0 (0)", - "csel x26, x24, x20, ne", - "bfxil w4, w26, #0, #8", - "cmn wzr, w26, lsl #24", + "orr x24, x22, x23", + "cmp x21, #0x6 (6)", + "csel x22, x25, x24, lo", + "orr w23, w24, w22", + "sub x22, x21, #0x6 (6)", + "cmp x25, #0x0 (0)", + "csel x12, x22, x21, ne", + "sub x21, x12, #0x60 (96)", + "cmp x24, #0x0 (0)", + "csel x22, x21, x12, ne", + "mov w21, w20", + "bfxil w21, w22, #0, #8", + "mov w4, w21", + "cmn wzr, w22, lsl #24", "mrs x20, nzcv", - "orr w20, w20, w23, lsl #29", - "eor w27, w26, w22, lsl #4", - "msr nzcv, x20" + "orr w21, w20, w23, lsl #29", + "mov w26, w22", + "eor w20, w22, w25, lsl #4", + "mov w27, w20", + "msr nzcv, x21" ] }, "aaa": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 20, "Comment": "0x37", "ExpectedArm64ASM": [ - "and x20, x4, #0xf", - "cmp x20, #0x9 (9)", - "cset x20, hi", - "eor w21, w27, w26", - "ubfx w21, w21, #4, #1", - "orr x20, x21, x20", - "lsl x21, x20, #29", - "eor w27, w26, w20, lsl #4", + "mov w20, w4", + "and x21, x20, #0xf", + "cmp x21, #0x9 (9)", + "cset x22, hi", + "mov w21, w27", + "mov w23, w26", + "eor w24, w21, w23", + "ubfx w21, w24, #4, #1", + "orr x24, x21, x22", + "lsl x21, x24, #29", + "eor w22, w23, w24, lsl #4", + "mov w27, w22", "msr nzcv, x21", - "add w20, w4, #0x106 (262)", - "csel w20, w20, w4, hs", + "add w21, w20, #0x106 (262)", + "csel w22, w21, w20, hs", "mov w21, #0xff0f", - "and w20, w20, w21", - "bfxil w4, w20, #0, #16" + "and w23, w22, w21", + "mov w21, w20", + "bfxil w21, w23, #0, #16", + "mov w4, w21" ] }, "aas": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 20, "Comment": "0x3f", "ExpectedArm64ASM": [ - "and x20, x4, #0xf", - "cmp x20, #0x9 (9)", - "cset x20, hi", - "eor w21, w27, w26", - "ubfx w21, w21, #4, #1", - "orr x20, x21, x20", - "lsl x21, x20, #29", - "eor w27, w26, w20, lsl #4", + "mov w20, w4", + "and x21, x20, #0xf", + "cmp x21, #0x9 (9)", + "cset x22, hi", + "mov w21, w27", + "mov w23, w26", + "eor w24, w21, w23", + "ubfx w21, w24, #4, #1", + "orr x24, x21, x22", + "lsl x21, x24, #29", + "eor w22, w23, w24, lsl #4", + "mov w27, w22", "msr nzcv, x21", - "sub w20, w4, #0x106 (262)", - "csel w20, w20, w4, hs", + "sub w21, w20, #0x106 (262)", + "csel w22, w21, w20, hs", "mov w21, #0xff0f", - "and w20, w20, w21", - "bfxil w4, w20, #0, #16" + "and w23, w22, w21", + "mov w21, w20", + "bfxil w21, w23, #0, #16", + "mov w4, w21" ] }, "inc ax": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 16, "Comment": "0x40", "ExpectedArm64ASM": [ "mov w20, #0x1", - "uxth w27, w4", - "cset w21, hs", - "lsl w0, w27, #16", + "mov w21, w4", + "uxth w22, w21", + "cset w23, hs", + "mov w27, w22", + "lsl w0, w22, #16", "cmn w0, w20, lsl #16", - "add w26, w27, #0x1 (1)", - "mrs x20, nzcv", - "bfi w20, w21, #29, #1", - "bfxil w4, w26, #0, #16", - "msr nzcv, x20" + "add w20, w22, #0x1 (1)", + "mov w26, w20", + "mrs x22, nzcv", + "mov w24, w22", + "bfi w24, w23, #29, #1", + "mov w22, w21", + "bfxil w22, w20, #0, #16", + "mov w4, w22", + "msr nzcv, x24" ] }, "inc eax": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": "0x40", "ExpectedArm64ASM": [ - "mov w27, w4", - "cset w20, hs", - "adds w26, w27, #0x1 (1)", - "mrs x21, nzcv", - "mov w0, w21", - "bfi w0, w20, #29, #1", - "mov w20, w0", - "mov w4, w26", - "msr nzcv, x20" + "mov w20, w4", + "cset w21, hs", + "mov w27, w20", + "adds w22, w20, #0x1 (1)", + "mov w26, w22", + "mrs x20, nzcv", + "mov w23, w20", + "bfi w23, w21, #29, #1", + "mov w4, w22", + "msr nzcv, x23" ] }, "dec ax": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 16, "Comment": "0x48", "ExpectedArm64ASM": [ "mov w20, #0x1", - "uxth w27, w4", - "cset w21, hs", - "lsl w0, w27, #16", + "mov w21, w4", + "uxth w22, w21", + "cset w23, hs", + "mov w27, w22", + "lsl w0, w22, #16", "cmp w0, w20, lsl #16", - "sub w26, w27, #0x1 (1)", - "mrs x20, nzcv", - "bfi w20, w21, #29, #1", - "bfxil w4, w26, #0, #16", - "msr nzcv, x20" + "sub w20, w22, #0x1 (1)", + "mov w26, w20", + "mrs x22, nzcv", + "mov w24, w22", + "bfi w24, w23, #29, #1", + "mov w22, w21", + "bfxil w22, w20, #0, #16", + "mov w4, w22", + "msr nzcv, x24" ] }, "push ax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x50", "ExpectedArm64ASM": [ - "strh w4, [x8, #-2]!" + "mov w20, w4", + "mov w21, w8", + "mov w22, w21", + "strh w20, [x22, #-2]!", + "mov w8, w22" ] }, "push eax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x50", "ExpectedArm64ASM": [ - "str w4, [x8, #-4]!" + "mov w20, w4", + "mov w21, w8", + "mov w22, w21", + "str w20, [x22, #-4]!", + "mov w8, w22" ] }, "dec eax": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": "0x48", "ExpectedArm64ASM": [ - "mov w27, w4", - "cset w20, hs", - "subs w26, w27, #0x1 (1)", - "mrs x21, nzcv", - "mov w0, w21", - "bfi w0, w20, #29, #1", - "mov w20, w0", - "mov w4, w26", - "msr nzcv, x20" + "mov w20, w4", + "cset w21, hs", + "mov w27, w20", + "subs w22, w20, #0x1 (1)", + "mov w26, w22", + "mrs x20, nzcv", + "mov w23, w20", + "bfi w23, w21, #29, #1", + "mov w4, w22", + "msr nzcv, x23" ] }, "pusha": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 25, "Comment": "0x60", "ExpectedArm64ASM": [ "mov w20, w8", - "str w4, [x20, #-4]!", - "str w5, [x20, #-4]!", - "str w6, [x20, #-4]!", - "str w7, [x20, #-4]!", - "str w8, [x20, #-4]!", - "str w9, [x20, #-4]!", - "str w10, [x20, #-4]!", - "mov w8, w20", - "str w11, [x8, #-4]!" + "mov w21, w4", + "mov w22, w20", + "str w21, [x22, #-4]!", + "mov w21, w5", + "mov w23, w22", + "str w21, [x23, #-4]!", + "mov w21, w6", + "mov w22, w23", + "str w21, [x22, #-4]!", + "mov w21, w7", + "mov w23, w22", + "str w21, [x23, #-4]!", + "mov w21, w23", + "str w20, [x21, #-4]!", + "mov w20, w9", + "mov w22, w21", + "str w20, [x22, #-4]!", + "mov w20, w10", + "mov w21, w22", + "str w20, [x21, #-4]!", + "mov w20, w11", + "mov w22, w21", + "str w20, [x22, #-4]!", + "mov w8, w22" ] }, "pushad": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 25, "Comment": "0x60", "ExpectedArm64ASM": [ "mov w20, w8", - "str w4, [x20, #-4]!", - "str w5, [x20, #-4]!", - "str w6, [x20, #-4]!", - "str w7, [x20, #-4]!", - "str w8, [x20, #-4]!", - "str w9, [x20, #-4]!", - "str w10, [x20, #-4]!", - "mov w8, w20", - "str w11, [x8, #-4]!" + "mov w21, w4", + "mov w22, w20", + "str w21, [x22, #-4]!", + "mov w21, w5", + "mov w23, w22", + "str w21, [x23, #-4]!", + "mov w21, w6", + "mov w22, w23", + "str w21, [x22, #-4]!", + "mov w21, w7", + "mov w23, w22", + "str w21, [x23, #-4]!", + "mov w21, w23", + "str w20, [x21, #-4]!", + "mov w20, w9", + "mov w22, w21", + "str w20, [x22, #-4]!", + "mov w20, w10", + "mov w21, w22", + "str w20, [x21, #-4]!", + "mov w20, w11", + "mov w22, w21", + "str w20, [x22, #-4]!", + "mov w8, w22" ] }, "popa": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 23, "Comment": "0x61", "ExpectedArm64ASM": [ - "ldr w11, [x8]", - "add x20, x8, #0x4 (4)", - "ldr w10, [x20]", - "add x20, x20, #0x4 (4)", - "ldr w9, [x20]", - "add x20, x20, #0x8 (8)", - "ldr w7, [x20]", - "add x20, x20, #0x4 (4)", - "ldr w6, [x20]", - "add x20, x20, #0x4 (4)", - "ldr w5, [x20]", - "add x20, x20, #0x4 (4)", - "ldr w4, [x20]", - "add x8, x20, #0x4 (4)" + "mov w20, w8", + "ldr w21, [x20]", + "mov w11, w21", + "add x21, x20, #0x4 (4)", + "ldr w20, [x21]", + "mov w10, w20", + "add x20, x21, #0x4 (4)", + "ldr w21, [x20]", + "mov w9, w21", + "add x21, x20, #0x8 (8)", + "ldr w20, [x21]", + "mov w7, w20", + "add x20, x21, #0x4 (4)", + "ldr w21, [x20]", + "mov w6, w21", + "add x21, x20, #0x4 (4)", + "ldr w20, [x21]", + "mov w5, w20", + "add x20, x21, #0x4 (4)", + "ldr w21, [x20]", + "mov w4, w21", + "add x21, x20, #0x4 (4)", + "mov w8, w21" ] }, "popad": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 23, "Comment": "0x61", "ExpectedArm64ASM": [ - "ldr w11, [x8]", - "add x20, x8, #0x4 (4)", - "ldr w10, [x20]", - "add x20, x20, #0x4 (4)", - "ldr w9, [x20]", - "add x20, x20, #0x8 (8)", - "ldr w7, [x20]", - "add x20, x20, #0x4 (4)", - "ldr w6, [x20]", - "add x20, x20, #0x4 (4)", - "ldr w5, [x20]", - "add x20, x20, #0x4 (4)", - "ldr w4, [x20]", - "add x8, x20, #0x4 (4)" + "mov w20, w8", + "ldr w21, [x20]", + "mov w11, w21", + "add x21, x20, #0x4 (4)", + "ldr w20, [x21]", + "mov w10, w20", + "add x20, x21, #0x4 (4)", + "ldr w21, [x20]", + "mov w9, w21", + "add x21, x20, #0x8 (8)", + "ldr w20, [x21]", + "mov w7, w20", + "add x20, x21, #0x4 (4)", + "ldr w21, [x20]", + "mov w6, w21", + "add x21, x20, #0x4 (4)", + "ldr w20, [x21]", + "mov w5, w20", + "add x20, x21, #0x4 (4)", + "ldr w21, [x20]", + "mov w4, w21", + "add x21, x20, #0x4 (4)", + "mov w8, w21" ] }, "aam": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": "0xd4", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "mov w21, #0xa", - "udiv x22, x20, x21", - "udiv x2, x20, x21", - "msub x20, x2, x21, x20", - "add x26, x20, x22, lsl #8", - "bfxil w4, w26, #0, #16", - "cmn wzr, w26, lsl #24" + "mov w20, w4", + "uxtb w21, w20", + "mov w22, #0xa", + "udiv x23, x21, x22", + "udiv x2, x21, x22", + "msub x24, x2, x22, x21", + "add x21, x24, x23, lsl #8", + "mov w22, w20", + "bfxil w22, w21, #0, #16", + "mov w4, w22", + "cmn wzr, w21, lsl #24", + "mov w26, w21" ] }, "aad": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0xd5", "ExpectedArm64ASM": [ - "lsr w20, w4, #8", - "mov w21, #0xa", - "mul x20, x20, x21", - "add x20, x4, x20", - "and x26, x20, #0xff", - "bfxil w4, w26, #0, #16", - "cmn wzr, w26, lsl #24" + "mov w20, w4", + "lsr w21, w20, #8", + "mov w22, #0xa", + "mul x23, x21, x22", + "add x21, x20, x23", + "and x22, x21, #0xff", + "mov w21, w20", + "bfxil w21, w22, #0, #16", + "mov w4, w21", + "cmn wzr, w22, lsl #24", + "mov w26, w22" ] }, "db 0xd4, 0x40": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": [ "aam with a different immediate byte base", "0xd4" ], "ExpectedArm64ASM": [ - "uxtb w20, w4", - "mov w21, #0x40", - "udiv x22, x20, x21", - "udiv x2, x20, x21", - "msub x20, x2, x21, x20", - "add x26, x20, x22, lsl #8", - "bfxil w4, w26, #0, #16", - "cmn wzr, w26, lsl #24" + "mov w20, w4", + "uxtb w21, w20", + "mov w22, #0x40", + "udiv x23, x21, x22", + "udiv x2, x21, x22", + "msub x24, x2, x22, x21", + "add x21, x24, x23, lsl #8", + "mov w22, w20", + "bfxil w22, w21, #0, #16", + "mov w4, w22", + "cmn wzr, w21, lsl #24", + "mov w26, w21" ] }, "db 0xd5, 0x40": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "aad with a different immediate byte base", "0xd5" ], "ExpectedArm64ASM": [ - "lsr w20, w4, #8", - "lsl x20, x20, #6", - "add x20, x4, x20", - "and x26, x20, #0xff", - "bfxil w4, w26, #0, #16", - "cmn wzr, w26, lsl #24" + "mov w20, w4", + "lsr w21, w20, #8", + "lsl x22, x21, #6", + "add x21, x20, x22", + "and x22, x21, #0xff", + "mov w21, w20", + "bfxil w21, w22, #0, #16", + "mov w4, w21", + "cmn wzr, w22, lsl #24", + "mov w26, w22" ] }, "salc": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0xd6", "ExpectedArm64ASM": [ "csetm w20, hs", - "bfxil w4, w20, #0, #8" + "mov w21, w4", + "mov w22, w21", + "bfxil w22, w20, #0, #8", + "mov w4, w22" ] } } diff --git a/unittests/InstructionCountCI/RPRES/DDD.json b/unittests/InstructionCountCI/RPRES/DDD.json index 8208bfd80d..d67b5c7437 100644 --- a/unittests/InstructionCountCI/RPRES/DDD.json +++ b/unittests/InstructionCountCI/RPRES/DDD.json @@ -18,8 +18,8 @@ ], "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", - "frecpe v2.2s, v2.2s", - "str d2, [x28, #768]" + "frecpe v3.2s, v2.2s", + "str d3, [x28, #768]" ] }, "pfrsqrtv mm0, mm1": { @@ -29,8 +29,8 @@ ], "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", - "frsqrte v2.2s, v2.2s", - "str d2, [x28, #768]" + "frsqrte v3.2s, v2.2s", + "str d3, [x28, #768]" ] }, "pfrcp mm0, mm1": { @@ -40,8 +40,8 @@ ], "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", - "frecpe s2, s2", - "dup v2.2s, v2.s[0]", + "frecpe s3, s2", + "dup v2.2s, v3.s[0]", "str d2, [x28, #768]" ] }, @@ -52,8 +52,8 @@ ], "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", - "frsqrte s2, s2", - "dup v2.2s, v2.s[0]", + "frsqrte s3, s2", + "dup v2.2s, v3.s[0]", "str d2, [x28, #768]" ] } diff --git a/unittests/InstructionCountCI/RPRES/Secondary.json b/unittests/InstructionCountCI/RPRES/Secondary.json index b5cd83ccde..71ee845371 100644 --- a/unittests/InstructionCountCI/RPRES/Secondary.json +++ b/unittests/InstructionCountCI/RPRES/Secondary.json @@ -12,21 +12,25 @@ }, "Instructions": { "rsqrtps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x0f 0x52" ], "ExpectedArm64ASM": [ - "frsqrte v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "frsqrte v3.4s, v2.4s", + "mov v16.16b, v3.16b" ] }, "rcpps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "0x0f 0x53" ], "ExpectedArm64ASM": [ - "frecpe v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "frecpe v3.4s, v2.4s", + "mov v16.16b, v3.16b" ] } } diff --git a/unittests/InstructionCountCI/RPRES/Secondary_REP_AFP.json b/unittests/InstructionCountCI/RPRES/Secondary_REP_AFP.json index ecff22b523..417a1bb541 100644 --- a/unittests/InstructionCountCI/RPRES/Secondary_REP_AFP.json +++ b/unittests/InstructionCountCI/RPRES/Secondary_REP_AFP.json @@ -12,21 +12,29 @@ }, "Instructions": { "rsqrtss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0x52" ], "ExpectedArm64ASM": [ - "frsqrte s16, s17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frsqrte s4, s3", + "mov v16.16b, v4.16b" ] }, "rcpss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "0xf3 0x0f 0x53" ], "ExpectedArm64ASM": [ - "frecpe s16, s17" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "frecpe s4, s3", + "mov v16.16b, v4.16b" ] } } diff --git a/unittests/InstructionCountCI/RPRES/VEX_map1_AFP.json b/unittests/InstructionCountCI/RPRES/VEX_map1_AFP.json index 40eeb98d42..ffb63080e6 100644 --- a/unittests/InstructionCountCI/RPRES/VEX_map1_AFP.json +++ b/unittests/InstructionCountCI/RPRES/VEX_map1_AFP.json @@ -11,60 +11,74 @@ }, "Instructions": { "vrsqrtps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x52 128-bit" ], "ExpectedArm64ASM": [ - "frsqrte v16.4s, v17.4s" + "mov z2.d, p7/m, z17.d", + "frsqrte v3.4s, v2.4s", + "mov z16.d, p7/m, z3.d" ] }, "vrsqrtps ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x52 256-bit" ], "ExpectedArm64ASM": [ - "frsqrte z16.s, z17.s" + "mov z2.d, p7/m, z17.d", + "frsqrte z3.s, z2.s", + "mov z16.d, p7/m, z3.d" ] }, "vrsqrtss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "AFP can make this more optimal", "Map 1 0b10 0x52 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "frsqrte s16, s18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "frsqrte s4, s3", + "mov z16.d, p7/m, z4.d" ] }, "vrcpps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x53 128-bit" ], "ExpectedArm64ASM": [ - "frecpe v16.4s, v17.4s" + "mov z2.d, p7/m, z17.d", + "frecpe v3.4s, v2.4s", + "mov z16.d, p7/m, z3.d" ] }, "vrcpps ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x53 256-bit" ], "ExpectedArm64ASM": [ - "frecpe z16.s, z17.s" + "mov z2.d, p7/m, z17.d", + "frecpe z3.s, z2.s", + "mov z16.d, p7/m, z3.d" ] }, "vrcpss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b10 0x53 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "frecpe s16, s18" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "frecpe s4, s3", + "mov z16.d, p7/m, z4.d" ] } } diff --git a/unittests/InstructionCountCI/Secondary.json b/unittests/InstructionCountCI/Secondary.json index 8978954d99..4773329c58 100644 --- a/unittests/InstructionCountCI/Secondary.json +++ b/unittests/InstructionCountCI/Secondary.json @@ -32,80 +32,111 @@ "ExpectedArm64ASM": [] }, "movups xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0x0f 0x10", "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v16.16b, v2.16b" ] }, "movups xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0x10", "ExpectedArm64ASM": [ - "ldr q16, [x4]" + "mov x20, x4", + "ldr q2, [x20]", + "mov v16.16b, v2.16b" ] }, "movups [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0x11", "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "str q2, [x20]" ] }, "movlps xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x12", "ExpectedArm64ASM": [ - "ld1 {v16.d}[0], [x4]" + "mov x20, x4", + "mov v2.16b, v16.16b", + "mov v3.16b, v2.16b", + "ld1 {v3.d}[0], [x20]", + "mov v16.16b, v3.16b" ] }, "movlps [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0x13", "ExpectedArm64ASM": [ - "str d16, [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "str d2, [x20]" ] }, "movhlps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x12", "ExpectedArm64ASM": [ - "mov v16.d[0], v17.d[1]" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "mov v4.16b, v3.16b", + "mov v4.d[0], v2.d[1]", + "mov v16.16b, v4.16b" ] }, "unpcklps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x14", "ExpectedArm64ASM": [ - "zip1 v16.4s, v16.4s, v17.4s" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "zip1 v4.4s, v2.4s, v3.4s", + "mov v16.16b, v4.16b" ] }, "unpckhps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x15", "ExpectedArm64ASM": [ - "zip2 v16.4s, v16.4s, v17.4s" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "zip2 v4.4s, v2.4s, v3.4s", + "mov v16.16b, v4.16b" ] }, "movhps xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x16", "ExpectedArm64ASM": [ - "ld1 {v16.d}[1], [x4]" + "mov x20, x4", + "mov v2.16b, v16.16b", + "mov v3.16b, v2.16b", + "ld1 {v3.d}[1], [x20]", + "mov v16.16b, v3.16b" ] }, "movlhps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x16", "ExpectedArm64ASM": [ - "mov v16.d[1], v17.d[0]" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "mov v4.16b, v3.16b", + "mov v4.d[1], v2.d[0]", + "mov v16.16b, v4.16b" ] }, "movhps [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0x17", "ExpectedArm64ASM": [ - "st1 {v16.d}[1], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.d}[1], [x20]" ] }, "nop": { @@ -119,670 +150,912 @@ "ExpectedArm64ASM": [] }, "movaps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0x0f 0x28", "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v16.16b, v2.16b" ] }, "movaps xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0x28", "ExpectedArm64ASM": [ - "ldr q16, [x4]" + "mov x20, x4", + "ldr q2, [x20]", + "mov v16.16b, v2.16b" ] }, "movaps [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0x29", "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "str q2, [x20]" ] }, "cvtpi2ps xmm0, [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr d2, [x4]", - "scvtf v0.2s, v2.2s", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr d3, [x20]", + "mov v4.16b, v2.16b", + "scvtf v0.2s, v3.2s", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "cvtpi2ps xmm0, mm0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr d2, [x28, #768]", - "scvtf v0.2s, v2.2s", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "ldr d3, [x28, #768]", + "mov v4.16b, v2.16b", + "scvtf v0.2s, v3.2s", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "movntps [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0x2b", "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "str q2, [x20]" ] }, "cvttps2pi mm0, [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x2c", "ExpectedArm64ASM": [ - "ldr d2, [x4]", - "fcvtzs v2.2s, v2.2s", - "str d2, [x28, #768]" + "mov x20, x4", + "ldr d2, [x20]", + "fcvtzs v3.2s, v2.2s", + "str d3, [x28, #768]" ] }, "cvttps2pi mm0, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0x2c", "ExpectedArm64ASM": [ - "fcvtzs v2.2s, v16.2s", - "str d2, [x28, #768]" + "mov v2.16b, v16.16b", + "fcvtzs v3.2s, v2.2s", + "str d3, [x28, #768]" ] }, "cvtps2pi mm0, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x2d", "ExpectedArm64ASM": [ - "ldr d2, [x4]", - "frinti v2.2s, v2.2s", - "fcvtzs v2.2s, v2.2s", - "str d2, [x28, #768]" + "mov x20, x4", + "ldr d2, [x20]", + "frinti v3.2s, v2.2s", + "fcvtzs v3.2s, v3.2s", + "str d3, [x28, #768]" ] }, "cvtps2pi mm0, xmm0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x2d", "ExpectedArm64ASM": [ - "frinti v2.2s, v16.2s", - "fcvtzs v2.2s, v2.2s", - "str d2, [x28, #768]" + "mov v2.16b, v16.16b", + "frinti v3.2s, v2.2s", + "fcvtzs v3.2s, v3.2s", + "str d3, [x28, #768]" ] }, "ucomiss xmm0, xmm1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 15, "Comment": "0x0f 0x2e", "ExpectedArm64ASM": [ - "fcmp s16, s17", - "mov w27, #0x0", - "cset w20, eq", - "cset w21, lo", - "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", - "msr nzcv, x20" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fcmp s2, s3", + "mov w20, #0x0", + "cset w21, eq", + "cset w22, lo", + "cset w23, vs", + "orr w24, w22, w23", + "lsl x22, x24, #29", + "orr w24, w21, w23", + "orr w21, w22, w24, lsl #30", + "eor w22, w23, #0x1", + "mov x26, x22", + "mov x27, x20", + "msr nzcv, x21" ] }, "comiss xmm0, xmm1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 15, "Comment": "0x0f 0x2f", "ExpectedArm64ASM": [ - "fcmp s16, s17", - "mov w27, #0x0", - "cset w20, eq", - "cset w21, lo", - "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", - "msr nzcv, x20" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fcmp s2, s3", + "mov w20, #0x0", + "cset w21, eq", + "cset w22, lo", + "cset w23, vs", + "orr w24, w22, w23", + "lsl x22, x24, #29", + "orr w24, w21, w23", + "orr w21, w22, w24, lsl #30", + "eor w22, w23, #0x1", + "mov x26, x22", + "mov x27, x20", + "msr nzcv, x21" ] }, "rdtsc": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x31", "ExpectedArm64ASM": [ "mrs x20, S3_3_c14_c0_2", - "lsl w4, w20, #7", - "lsr x6, x20, #25" + "lsl w21, w20, #7", + "lsr x22, x20, #25", + "mov x4, x21", + "mov x6, x22" ] }, "cmovo ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x40", "ExpectedArm64ASM": [ - "csel w20, w7, w4, vs", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, vs", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovo eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x40", "ExpectedArm64ASM": [ - "csel w4, w7, w4, vs" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, vs", + "mov x4, x22" ] }, "cmovo rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x40", "ExpectedArm64ASM": [ - "csel x4, x7, x4, vs" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, vs", + "mov x4, x22" ] }, "cmovno ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x41", "ExpectedArm64ASM": [ - "csel w20, w7, w4, vc", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, vc", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovno eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x41", "ExpectedArm64ASM": [ - "csel w4, w7, w4, vc" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, vc", + "mov x4, x22" ] }, "cmovno rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x41", "ExpectedArm64ASM": [ - "csel x4, x7, x4, vc" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, vc", + "mov x4, x22" ] }, "cmovb ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x42", "ExpectedArm64ASM": [ - "csel w20, w7, w4, hs", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, hs", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovb eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x42", "ExpectedArm64ASM": [ - "csel w4, w7, w4, hs" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, hs", + "mov x4, x22" ] }, "cmovb rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x42", "ExpectedArm64ASM": [ - "csel x4, x7, x4, hs" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, hs", + "mov x4, x22" ] }, "cmovnb ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x43", "ExpectedArm64ASM": [ - "csel w20, w7, w4, lo", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, lo", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovnb eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x43", "ExpectedArm64ASM": [ - "csel w4, w7, w4, lo" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, lo", + "mov x4, x22" ] }, "cmovnb rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x43", "ExpectedArm64ASM": [ - "csel x4, x7, x4, lo" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, lo", + "mov x4, x22" ] }, "cmovz ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x44", "ExpectedArm64ASM": [ - "csel w20, w7, w4, eq", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, eq", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovz eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x44", "ExpectedArm64ASM": [ - "csel w4, w7, w4, eq" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, eq", + "mov x4, x22" ] }, "cmovz rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x44", "ExpectedArm64ASM": [ - "csel x4, x7, x4, eq" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, eq", + "mov x4, x22" ] }, "cmovnz ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x45", "ExpectedArm64ASM": [ - "csel w20, w7, w4, ne", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, ne", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovnz eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x45", "ExpectedArm64ASM": [ - "csel w4, w7, w4, ne" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, ne", + "mov x4, x22" ] }, "cmovnz rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x45", "ExpectedArm64ASM": [ - "csel x4, x7, x4, ne" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, ne", + "mov x4, x22" ] }, "cmovbe ax, bx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x46", "ExpectedArm64ASM": [ - "csel w20, w7, w4, hs", - "csel w20, w7, w20, eq", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, hs", + "csel w23, w21, w22, eq", + "mov x21, x20", + "bfxil x21, x23, #0, #16", + "mov x4, x21" ] }, "cmovbe eax, ebx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x46", "ExpectedArm64ASM": [ - "csel w20, w7, w4, hs", - "csel w4, w7, w20, eq" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, hs", + "csel w20, w21, w22, eq", + "mov x4, x20" ] }, "cmovbe rax, rbx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x46", "ExpectedArm64ASM": [ - "csel x20, x7, x4, hs", - "csel x4, x7, x20, eq" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, hs", + "csel x20, x21, x22, eq", + "mov x4, x20" ] }, "cmovnbe ax, bx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x47", "ExpectedArm64ASM": [ - "csel w20, w7, w4, lo", - "csel w20, w20, w4, ne", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, lo", + "csel w21, w22, w20, ne", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "cmovnbe eax, ebx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x47", "ExpectedArm64ASM": [ - "csel w20, w7, w4, lo", - "csel w4, w20, w4, ne" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, lo", + "csel w21, w22, w20, ne", + "mov x4, x21" ] }, "cmovnbe rax, rbx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x47", "ExpectedArm64ASM": [ - "csel x20, x7, x4, lo", - "csel x4, x20, x4, ne" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, lo", + "csel x21, x22, x20, ne", + "mov x4, x21" ] }, "cmovs ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x48", "ExpectedArm64ASM": [ - "csel w20, w7, w4, mi", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, mi", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovs eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x48", "ExpectedArm64ASM": [ - "csel w4, w7, w4, mi" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, mi", + "mov x4, x22" ] }, "cmovs rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x48", "ExpectedArm64ASM": [ - "csel x4, x7, x4, mi" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, mi", + "mov x4, x22" ] }, "cmovns ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x49", "ExpectedArm64ASM": [ - "csel w20, w7, w4, pl", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, pl", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovns eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x49", "ExpectedArm64ASM": [ - "csel w4, w7, w4, pl" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, pl", + "mov x4, x22" ] }, "cmovns rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x49", "ExpectedArm64ASM": [ - "csel x4, x7, x4, pl" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, pl", + "mov x4, x22" ] }, "cmovpe ax, bx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0x4a", "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csel w20, w7, w4, ne", - "bfxil x4, x20, #0, #16", - "msr nzcv, x21" + "mov x20, x4", + "mov x21, x7", + "mov x22, x26", + "eor w23, w22, w22, lsr #4", + "eor w22, w23, w23, lsr #2", + "eon w23, w22, w22, lsr #1", + "mrs x22, nzcv", + "tst w23, #0x1", + "csel w23, w21, w20, ne", + "mov x21, x20", + "bfxil x21, x23, #0, #16", + "mov x4, x21", + "msr nzcv, x22" ] }, "cmovpe eax, ebx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0x4a", "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csel w4, w7, w4, ne", - "msr nzcv, x21" + "mov x20, x4", + "mov x21, x7", + "mov x22, x26", + "eor w23, w22, w22, lsr #4", + "eor w22, w23, w23, lsr #2", + "eon w23, w22, w22, lsr #1", + "mrs x22, nzcv", + "tst w23, #0x1", + "csel w23, w21, w20, ne", + "mov x4, x23", + "msr nzcv, x22" ] }, "cmovpe rax, rbx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0x4a", "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csel x4, x7, x4, ne", - "msr nzcv, x21" + "mov x20, x4", + "mov x21, x7", + "mov x22, x26", + "eor w23, w22, w22, lsr #4", + "eor w22, w23, w23, lsr #2", + "eon w23, w22, w22, lsr #1", + "mrs x22, nzcv", + "tst w23, #0x1", + "csel x23, x21, x20, ne", + "mov x4, x23", + "msr nzcv, x22" ] }, "cmovnp ax, bx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0x4b", "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csel w20, w7, w4, ne", - "bfxil x4, x20, #0, #16", - "msr nzcv, x21" + "mov x20, x4", + "mov x21, x7", + "mov x22, x26", + "eor w23, w22, w22, lsr #4", + "eor w22, w23, w23, lsr #2", + "eor w23, w22, w22, lsr #1", + "mrs x22, nzcv", + "tst w23, #0x1", + "csel w23, w21, w20, ne", + "mov x21, x20", + "bfxil x21, x23, #0, #16", + "mov x4, x21", + "msr nzcv, x22" ] }, "cmovnp eax, ebx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0x4b", "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csel w4, w7, w4, ne", - "msr nzcv, x21" + "mov x20, x4", + "mov x21, x7", + "mov x22, x26", + "eor w23, w22, w22, lsr #4", + "eor w22, w23, w23, lsr #2", + "eor w23, w22, w22, lsr #1", + "mrs x22, nzcv", + "tst w23, #0x1", + "csel w23, w21, w20, ne", + "mov x4, x23", + "msr nzcv, x22" ] }, "cmovnp rax, rbx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0x4b", "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csel x4, x7, x4, ne", - "msr nzcv, x21" + "mov x20, x4", + "mov x21, x7", + "mov x22, x26", + "eor w23, w22, w22, lsr #4", + "eor w22, w23, w23, lsr #2", + "eor w23, w22, w22, lsr #1", + "mrs x22, nzcv", + "tst w23, #0x1", + "csel x23, x21, x20, ne", + "mov x4, x23", + "msr nzcv, x22" ] }, "cmovl ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x4c", "ExpectedArm64ASM": [ - "csel w20, w7, w4, lt", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, lt", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovl eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x4c", "ExpectedArm64ASM": [ - "csel w4, w7, w4, lt" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, lt", + "mov x4, x22" ] }, "cmovl rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x4c", "ExpectedArm64ASM": [ - "csel x4, x7, x4, lt" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, lt", + "mov x4, x22" ] }, "cmovnl ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x4d", "ExpectedArm64ASM": [ - "csel w20, w7, w4, ge", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, ge", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovnl eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x4d", "ExpectedArm64ASM": [ - "csel w4, w7, w4, ge" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, ge", + "mov x4, x22" ] }, "cmovnl rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x4d", "ExpectedArm64ASM": [ - "csel x4, x7, x4, ge" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, ge", + "mov x4, x22" ] }, "cmovle ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x4e", "ExpectedArm64ASM": [ - "csel w20, w7, w4, le", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, le", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovle eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x4e", "ExpectedArm64ASM": [ - "csel w4, w7, w4, le" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, le", + "mov x4, x22" ] }, "cmovle rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x4e", "ExpectedArm64ASM": [ - "csel x4, x7, x4, le" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, le", + "mov x4, x22" ] }, "cmovnle ax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x4f", "ExpectedArm64ASM": [ - "csel w20, w7, w4, gt", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, gt", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "cmovnle eax, ebx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x4f", "ExpectedArm64ASM": [ - "csel w4, w7, w4, gt" + "mov x20, x4", + "mov x21, x7", + "csel w22, w21, w20, gt", + "mov x4, x22" ] }, "cmovnle rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x4f", "ExpectedArm64ASM": [ - "csel x4, x7, x4, gt" + "mov x20, x4", + "mov x21, x7", + "csel x22, x21, x20, gt", + "mov x4, x22" ] }, "movmskps eax, xmm0": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x50", "ExpectedArm64ASM": [ - "ushr v2.4s, v16.4s, #31", - "ldr q3, [x28, #2144]", - "ushl v2.4s, v2.4s, v3.4s", - "addv s2, v2.4s", - "mov w4, v2.s[0]" + "mov v2.16b, v16.16b", + "ushr v3.4s, v2.4s, #31", + "ldr q2, [x28, #2144]", + "ushl v4.4s, v3.4s, v2.4s", + "addv s2, v4.4s", + "mov w20, v2.s[0]", + "mov x4, x20" ] }, "movmskps rax, xmm0": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x50", "ExpectedArm64ASM": [ - "ushr v2.4s, v16.4s, #31", - "ldr q3, [x28, #2144]", - "ushl v2.4s, v2.4s, v3.4s", - "addv s2, v2.4s", - "mov w4, v2.s[0]" + "mov v2.16b, v16.16b", + "ushr v3.4s, v2.4s, #31", + "ldr q2, [x28, #2144]", + "ushl v4.4s, v3.4s, v2.4s", + "addv s2, v4.4s", + "mov w20, v2.s[0]", + "mov x4, x20" ] }, "sqrtps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0x51", "ExpectedArm64ASM": [ - "fsqrt v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "fsqrt v3.4s, v2.4s", + "mov v16.16b, v3.16b" ] }, "rsqrtps xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "0x0f 0x52" ], "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", "fmov v0.4s, #0x70 (1.0000)", - "fsqrt v1.4s, v17.4s", - "fdiv v16.4s, v0.4s, v1.4s" + "fsqrt v1.4s, v2.4s", + "fdiv v3.4s, v0.4s, v1.4s", + "mov v16.16b, v3.16b" ] }, "rcpps xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "0x0f 0x53" ], "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", "fmov v0.4s, #0x70 (1.0000)", - "fdiv v16.4s, v0.4s, v17.4s" + "fdiv v3.4s, v0.4s, v2.4s", + "mov v16.16b, v3.16b" ] }, "andps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x54", "ExpectedArm64ASM": [ - "and v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "and v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "andnps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x55", "ExpectedArm64ASM": [ - "bic v16.16b, v17.16b, v16.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "bic v4.16b, v2.16b, v3.16b", + "mov v16.16b, v4.16b" ] }, "orps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x56", "ExpectedArm64ASM": [ - "orr v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "orr v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "xorps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x57", "ExpectedArm64ASM": [ - "eor v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "eor v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "addps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x58", "ExpectedArm64ASM": [ - "fadd v16.4s, v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fadd v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "mulps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x59", "ExpectedArm64ASM": [ - "fmul v16.4s, v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fmul v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "cvtps2pd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0x5a", "ExpectedArm64ASM": [ - "fcvtl v16.2d, v17.2s" + "mov v2.16b, v17.16b", + "fcvtl v3.2d, v2.2s", + "mov v16.16b, v3.16b" ] }, "cvtps2pd xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x5a", "ExpectedArm64ASM": [ - "ldr d2, [x4]", - "fcvtl v16.2d, v2.2s" + "mov x20, x4", + "ldr d2, [x20]", + "fcvtl v3.2d, v2.2s", + "mov v16.16b, v3.16b" ] }, "cvtdq2ps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0x5b", "ExpectedArm64ASM": [ - "scvtf v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "scvtf v3.4s, v2.4s", + "mov v16.16b, v3.16b" ] }, "subps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x5c", "ExpectedArm64ASM": [ - "fsub v16.4s, v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fsub v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "minps xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x5d", "ExpectedArm64ASM": [ - "fcmgt v0.4s, v17.4s, v16.4s", - "bif v16.16b, v17.16b, v0.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmgt v0.4s, v2.4s, v3.4s", + "mov v4.16b, v3.16b", + "bif v4.16b, v2.16b, v0.16b", + "mov v16.16b, v4.16b" ] }, "divps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x5e", "ExpectedArm64ASM": [ - "fdiv v16.4s, v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fdiv v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "maxps xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x5f", "ExpectedArm64ASM": [ - "fcmgt v0.4s, v17.4s, v16.4s", - "bit v16.16b, v17.16b, v0.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmgt v0.4s, v2.4s, v3.4s", + "mov v4.16b, v3.16b", + "bit v4.16b, v2.16b, v0.16b", + "mov v16.16b, v4.16b" ] }, "punpcklbw mm0, mm1": { @@ -791,18 +1064,19 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "zip1 v2.8b, v2.8b, v3.8b", - "str d2, [x28, #768]" + "zip1 v4.8b, v2.8b, v3.8b", + "str d4, [x28, #768]" ] }, "punpcklbw mm0, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x60", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ldr d3, [x4]", - "zip1 v2.8b, v2.8b, v3.8b", - "str d2, [x28, #768]" + "mov x20, x4", + "ldr d3, [x20]", + "zip1 v4.8b, v2.8b, v3.8b", + "str d4, [x28, #768]" ] }, "punpcklwd mm0, mm1": { @@ -811,18 +1085,19 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "zip1 v2.4h, v2.4h, v3.4h", - "str d2, [x28, #768]" + "zip1 v4.4h, v2.4h, v3.4h", + "str d4, [x28, #768]" ] }, "punpcklwd mm0, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x61", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ldr d3, [x4]", - "zip1 v2.4h, v2.4h, v3.4h", - "str d2, [x28, #768]" + "mov x20, x4", + "ldr d3, [x20]", + "zip1 v4.4h, v2.4h, v3.4h", + "str d4, [x28, #768]" ] }, "punpckldq mm0, mm1": { @@ -831,18 +1106,19 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "zip1 v2.2s, v2.2s, v3.2s", - "str d2, [x28, #768]" + "zip1 v4.2s, v2.2s, v3.2s", + "str d4, [x28, #768]" ] }, "punpckldq mm0, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x62", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ldr d3, [x4]", - "zip1 v2.2s, v2.2s, v3.2s", - "str d2, [x28, #768]" + "mov x20, x4", + "ldr d3, [x20]", + "zip1 v4.2s, v2.2s, v3.2s", + "str d4, [x28, #768]" ] }, "packsswb mm0, mm1": { @@ -851,20 +1127,21 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "zip1 v2.2d, v2.2d, v3.2d", - "sqxtn v2.8b, v2.8h", - "str d2, [x28, #768]" + "zip1 v4.2d, v2.2d, v3.2d", + "sqxtn v4.8b, v4.8h", + "str d4, [x28, #768]" ] }, "packsswb mm0, [rax]": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x63", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ldr d3, [x4]", - "zip1 v2.2d, v2.2d, v3.2d", - "sqxtn v2.8b, v2.8h", - "str d2, [x28, #768]" + "mov x20, x4", + "ldr d3, [x20]", + "zip1 v4.2d, v2.2d, v3.2d", + "sqxtn v4.8b, v4.8h", + "str d4, [x28, #768]" ] }, "packsswb mm0, mm0": { @@ -872,9 +1149,9 @@ "Comment": "0x0f 0x63", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "zip1 v2.2d, v2.2d, v2.2d", - "sqxtn v2.8b, v2.8h", - "str d2, [x28, #768]" + "zip1 v3.2d, v2.2d, v2.2d", + "sqxtn v3.8b, v3.8h", + "str d3, [x28, #768]" ] }, "pcmpgtb mm0, mm1": { @@ -883,8 +1160,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "cmgt v2.16b, v3.16b, v2.16b", - "str d2, [x28, #768]" + "cmgt v4.16b, v3.16b, v2.16b", + "str d4, [x28, #768]" ] }, "pcmpgtw mm0, mm1": { @@ -893,8 +1170,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "cmgt v2.8h, v3.8h, v2.8h", - "str d2, [x28, #768]" + "cmgt v4.8h, v3.8h, v2.8h", + "str d4, [x28, #768]" ] }, "pcmpgtd mm0, mm1": { @@ -903,8 +1180,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "cmgt v2.4s, v3.4s, v2.4s", - "str d2, [x28, #768]" + "cmgt v4.4s, v3.4s, v2.4s", + "str d4, [x28, #768]" ] }, "punpckhbw mm0, mm1": { @@ -913,18 +1190,19 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "zip2 v2.8b, v2.8b, v3.8b", - "str d2, [x28, #768]" + "zip2 v4.8b, v2.8b, v3.8b", + "str d4, [x28, #768]" ] }, "punpckhbw mm0, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x68", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ldr d3, [x4]", - "zip2 v2.8b, v2.8b, v3.8b", - "str d2, [x28, #768]" + "mov x20, x4", + "ldr d3, [x20]", + "zip2 v4.8b, v2.8b, v3.8b", + "str d4, [x28, #768]" ] }, "punpckhwd mm0, mm1": { @@ -933,18 +1211,19 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "zip2 v2.4h, v2.4h, v3.4h", - "str d2, [x28, #768]" + "zip2 v4.4h, v2.4h, v3.4h", + "str d4, [x28, #768]" ] }, "punpckhwd mm0, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x69", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ldr d3, [x4]", - "zip2 v2.4h, v2.4h, v3.4h", - "str d2, [x28, #768]" + "mov x20, x4", + "ldr d3, [x20]", + "zip2 v4.4h, v2.4h, v3.4h", + "str d4, [x28, #768]" ] }, "punpckhdq mm0, mm1": { @@ -953,18 +1232,19 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "zip2 v2.2s, v2.2s, v3.2s", - "str d2, [x28, #768]" + "zip2 v4.2s, v2.2s, v3.2s", + "str d4, [x28, #768]" ] }, "punpckhdq mm0, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x6a", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ldr d3, [x4]", - "zip2 v2.2s, v2.2s, v3.2s", - "str d2, [x28, #768]" + "mov x20, x4", + "ldr d3, [x20]", + "zip2 v4.2s, v2.2s, v3.2s", + "str d4, [x28, #768]" ] }, "packssdw mm0, mm1": { @@ -973,24 +1253,26 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "zip1 v2.2d, v2.2d, v3.2d", - "sqxtn v2.4h, v2.4s", - "str d2, [x28, #768]" + "zip1 v4.2d, v2.2d, v3.2d", + "sqxtn v4.4h, v4.4s", + "str d4, [x28, #768]" ] }, "movd mm0, eax": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0x6e", "ExpectedArm64ASM": [ - "fmov s2, w4", + "mov x20, x4", + "fmov s2, w20", "str d2, [x28, #768]" ] }, "movd mm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0x6e", "ExpectedArm64ASM": [ - "ldr s2, [x4]", + "mov x20, x4", + "ldr s2, [x20]", "str d2, [x28, #768]" ] }, @@ -1011,10 +1293,11 @@ ] }, "movq mm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0x6f", "ExpectedArm64ASM": [ - "ldr d2, [x4]", + "mov x20, x4", + "ldr d2, [x20]", "str d2, [x28, #768]" ] }, @@ -1023,17 +1306,18 @@ "Comment": "0x0f 0x70", "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", - "dup v2.4h, v2.h[0]", - "str d2, [x28, #768]" + "dup v3.4h, v2.h[0]", + "str d3, [x28, #768]" ] }, "pshufw mm0, [rax], 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x70", "ExpectedArm64ASM": [ - "ldr d2, [x4]", - "dup v2.4h, v2.h[0]", - "str d2, [x28, #768]" + "mov x20, x4", + "ldr d2, [x20]", + "dup v3.4h, v2.h[0]", + "str d3, [x28, #768]" ] }, "pshufw mm0, mm1, 1": { @@ -1043,19 +1327,20 @@ "ldr d2, [x28, #784]", "ldr x0, [x28, #1744]", "ldr d3, [x0, #16]", - "tbl v2.8b, {v2.16b}, v3.8b", - "str d2, [x28, #768]" + "tbl v4.8b, {v2.16b}, v3.8b", + "str d4, [x28, #768]" ] }, "pshufw mm0, [rax], 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x70", "ExpectedArm64ASM": [ - "ldr d2, [x4]", + "mov x20, x4", + "ldr d2, [x20]", "ldr x0, [x28, #1744]", "ldr d3, [x0, #16]", - "tbl v2.8b, {v2.16b}, v3.8b", - "str d2, [x28, #768]" + "tbl v4.8b, {v2.16b}, v3.8b", + "str d4, [x28, #768]" ] }, "pshufw mm0, mm1, 0xff": { @@ -1063,17 +1348,18 @@ "Comment": "0x0f 0x70", "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", - "dup v2.4h, v2.h[3]", - "str d2, [x28, #768]" + "dup v3.4h, v2.h[3]", + "str d3, [x28, #768]" ] }, "pshufw mm0, [rax], 0xff": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0x70", "ExpectedArm64ASM": [ - "ldr d2, [x4]", - "dup v2.4h, v2.h[3]", - "str d2, [x28, #768]" + "mov x20, x4", + "ldr d2, [x20]", + "dup v3.4h, v2.h[3]", + "str d3, [x28, #768]" ] }, "pcmpeqb mm0, mm1": { @@ -1082,8 +1368,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "cmeq v2.16b, v3.16b, v2.16b", - "str d2, [x28, #768]" + "cmeq v4.16b, v3.16b, v2.16b", + "str d4, [x28, #768]" ] }, "pcmpeqw mm0, mm1": { @@ -1092,8 +1378,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "cmeq v2.8h, v3.8h, v2.8h", - "str d2, [x28, #768]" + "cmeq v4.8h, v3.8h, v2.8h", + "str d4, [x28, #768]" ] }, "pcmpeqd mm0, mm1": { @@ -1102,8 +1388,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "cmeq v2.4s, v3.4s, v2.4s", - "str d2, [x28, #768]" + "cmeq v4.4s, v3.4s, v2.4s", + "str d4, [x28, #768]" ] }, "emms": { @@ -1115,19 +1401,21 @@ ] }, "movd eax, mm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0x7e", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "mov w4, v2.s[0]" + "mov w20, v2.s[0]", + "mov x4, x20" ] }, "movd [rax], mm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0x7e", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "str s2, [x4]" + "mov x20, x4", + "str s2, [x20]" ] }, "db 0x0f, 0x7f, 0xc1": { @@ -1143,239 +1431,307 @@ ] }, "movq [rax], mm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0x7f", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "str d2, [x4]" + "mov x20, x4", + "str d2, [x20]" ] }, "seto al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x90", "ExpectedArm64ASM": [ "cset x20, vs", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setno al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x91", "ExpectedArm64ASM": [ "cset x20, vc", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setb al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x92", "ExpectedArm64ASM": [ "cset x20, hs", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setnb al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x93", "ExpectedArm64ASM": [ "cset x20, lo", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setz al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x94", "ExpectedArm64ASM": [ "cset x20, eq", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setnz al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x95", "ExpectedArm64ASM": [ "cset x20, ne", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setbe al": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x96", "ExpectedArm64ASM": [ "mov w20, #0x1", "cset x21, hs", - "csel x20, x20, x21, eq", - "bfxil x4, x20, #0, #8" + "csel x22, x20, x21, eq", + "mov x20, x4", + "mov x21, x20", + "bfxil x21, x22, #0, #8", + "mov x4, x21" ] }, "setnbe al": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0x97", "ExpectedArm64ASM": [ "cset x20, lo", - "csel x20, x20, xzr, ne", - "bfxil x4, x20, #0, #8" + "csel x21, x20, xzr, ne", + "mov x20, x4", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22" ] }, "sets al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x98", "ExpectedArm64ASM": [ "cset x20, mi", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setns al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x99", "ExpectedArm64ASM": [ "cset x20, pl", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setpe al": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0x9a", "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "and x20, x20, #0x1", - "bfxil x4, x20, #0, #8" + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "and x20, x21, #0x1", + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setnp al": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0x9b", "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "and x20, x20, #0x1", - "bfxil x4, x20, #0, #8" + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "and x20, x21, #0x1", + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setl al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x9c", "ExpectedArm64ASM": [ "cset x20, lt", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setnl al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x9d", "ExpectedArm64ASM": [ "cset x20, ge", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setle al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x9e", "ExpectedArm64ASM": [ "cset x20, le", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "setnle al": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0x9f", "ExpectedArm64ASM": [ "cset x20, gt", - "bfxil x4, x20, #0, #8" + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #8", + "mov x4, x22" ] }, "push fs": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xa0", "ExpectedArm64ASM": [ - "ldr x20, [x28, #176]", - "str x20, [x8, #-8]!" + "mov x20, x8", + "ldr x21, [x28, #176]", + "mov x22, x20", + "str x21, [x22, #-8]!", + "mov x8, x22" ] }, "pop fs": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xa1", "ExpectedArm64ASM": [ - "ldr x20, [x8]", - "add x8, x8, #0x8 (8)", - "strh w20, [x28, #146]", - "ubfx w20, w20, #3, #13", + "mov x20, x8", + "ldr x21, [x20]", + "add x22, x20, #0x8 (8)", + "mov x8, x22", + "strh w21, [x28, #146]", + "ubfx w20, w21, #3, #13", "add x0, x28, x20, lsl #2", - "ldr w20, [x0, #896]", - "str w20, [x28, #176]" + "ldr w21, [x0, #896]", + "str w21, [x28, #176]" ] }, "bt ax, bx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0xa3", "ExpectedArm64ASM": [ - "and x20, x7, #0xf", - "lsr w20, w4, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x7", + "mov x21, x4", + "and x22, x20, #0xf", + "lsr w20, w21, w22", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "bt [rax], bx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xa3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #13", - "ldrb w21, [x4, x21, sxtx]", - "lsr w20, w21, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #13", + "ldrb w20, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "bt eax, ebx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xa3", "ExpectedArm64ASM": [ - "lsr w20, w4, w7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "lsr w22, w21, w20", + "ubfx x20, x22, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "bt [rax], ebx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xa3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #29", - "ldrb w21, [x4, x21, sxtx]", - "lsr w20, w21, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #29", + "ldrb w20, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "bt rax, rbx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xa3", "ExpectedArm64ASM": [ - "lsr x20, x4, x7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "lsr x22, x21, x20", + "ubfx x20, x22, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "bt [rax], rbx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xa3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "asr x21, x7, #3", - "ldrb w21, [x4, x21, sxtx]", - "lsr w20, w21, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "asr x23, x20, #3", + "ldrb w20, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "shld ax, bx, 0": { @@ -1384,143 +1740,177 @@ "ExpectedArm64ASM": [] }, "shld ax, bx, 1": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 19, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w4", - "lsl x22, x21, #1", - "lsr w20, w20, #15", - "orr x26, x22, x20", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "ubfx x20, x21, #15, #1", - "mrs x22, nzcv", - "orr w20, w22, w20, lsl #29", - "eor w21, w26, w21", - "ubfx x21, x21, #15, #1", - "orr w20, w20, w21, lsl #28", + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "uxth w22, w20", + "lsl x23, x22, #1", + "lsr w24, w21, #15", + "orr x21, x23, x24", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "cmn wzr, w21, lsl #16", + "ubfx x20, x22, #15, #1", + "mrs x23, nzcv", + "orr w24, w23, w20, lsl #29", + "mov x26, x21", + "eor w20, w21, w22", + "ubfx x21, x20, #15, #1", + "orr w20, w24, w21, lsl #28", "msr nzcv, x20" ] }, "shld ax, bx, 15": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 16, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w4", - "lsl x22, x21, #15", - "lsr w20, w20, #1", - "orr x26, x22, x20", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "ubfx x20, x21, #1, #1", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "uxth w22, w20", + "lsl x23, x22, #15", + "lsr w24, w21, #1", + "orr x21, x23, x24", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "cmn wzr, w21, lsl #16", + "ubfx x20, x22, #1, #1", + "mrs x22, nzcv", + "orr w23, w22, w20, lsl #29", + "mov x26, x21", + "msr nzcv, x23" ] }, "shld ax, bx, 16": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 15, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w4", - "lsl x22, x21, #16", - "orr x26, x22, x20", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "ubfx x20, x21, #0, #1", + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "uxth w22, w20", + "lsl x23, x22, #16", + "orr x24, x23, x21", + "mov x21, x20", + "bfxil x21, x24, #0, #16", + "mov x4, x21", + "cmn wzr, w24, lsl #16", + "ubfx x20, x22, #0, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "orr w22, w21, w20, lsl #29", + "mov x26, x24", + "msr nzcv, x22" ] }, "shld ax, bx, 31": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 16, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w4", - "lsl x22, x21, #31", - "lsr w20, w20, #17", - "orr x26, x22, x20", - "bfxil x4, x26, #0, #16", - "cmn wzr, w26, lsl #16", - "ubfx x20, x21, #1, #1", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "uxth w22, w20", + "lsl x23, x22, #31", + "lsr w24, w21, #17", + "orr x21, x23, x24", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "cmn wzr, w21, lsl #16", + "ubfx x20, x22, #1, #1", + "mrs x22, nzcv", + "orr w23, w22, w20, lsl #29", + "mov x26, x21", + "msr nzcv, x23" ] }, "shld eax, ebx, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "mov w4, w4" + "mov x20, x4", + "mov w21, w20", + "mov x4, x21" ] }, "shld eax, ebx, 1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 15, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "extr w4, w21, w20, #31", - "tst w4, w4", + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "mov w22, w20", + "extr w20, w22, w21, #31", + "mov x4, x20", + "tst w20, w20", + "ubfx x21, x22, #31, #1", + "mrs x23, nzcv", + "orr w24, w23, w21, lsl #29", + "mov x26, x20", + "eor w21, w20, w22", "ubfx x20, x21, #31, #1", - "mrs x22, nzcv", - "orr w20, w22, w20, lsl #29", - "mov x26, x4", - "eor w21, w4, w21", - "ubfx x21, x21, #31, #1", - "orr w20, w20, w21, lsl #28", - "msr nzcv, x20" + "orr w21, w24, w20, lsl #28", + "msr nzcv, x21" ] }, "shld eax, ebx, 15": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "extr w4, w21, w20, #17", - "tst w4, w4", - "ubfx x20, x21, #17, #1", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "mov x26, x4", - "msr nzcv, x20" + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "mov w22, w20", + "extr w20, w22, w21, #17", + "mov x4, x20", + "tst w20, w20", + "ubfx x21, x22, #17, #1", + "mrs x22, nzcv", + "orr w23, w22, w21, lsl #29", + "mov x26, x20", + "msr nzcv, x23" ] }, "shld eax, ebx, 16": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "extr w4, w21, w20, #16", - "tst w4, w4", - "ubfx x20, x21, #16, #1", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "mov x26, x4", - "msr nzcv, x20" + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "mov w22, w20", + "extr w20, w22, w21, #16", + "mov x4, x20", + "tst w20, w20", + "ubfx x21, x22, #16, #1", + "mrs x22, nzcv", + "orr w23, w22, w21, lsl #29", + "mov x26, x20", + "msr nzcv, x23" ] }, "shld eax, ebx, 31": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "extr w4, w21, w20, #1", - "tst w4, w4", - "ubfx x20, x21, #1, #1", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "mov x26, x4", - "msr nzcv, x20" + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "mov w22, w20", + "extr w20, w22, w21, #1", + "mov x4, x20", + "tst w20, w20", + "ubfx x21, x22, #1, #1", + "mrs x22, nzcv", + "orr w23, w22, w21, lsl #29", + "mov x26, x20", + "msr nzcv, x23" ] }, "shld rax, rbx, 0": { @@ -1529,1707 +1919,2126 @@ "ExpectedArm64ASM": [] }, "shld rax, rbx, 1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "mov x20, x4", - "extr x4, x20, x7, #63", - "tst x4, x4", + "mov x20, x7", + "mov x21, x4", + "extr x22, x21, x20, #63", + "mov x4, x22", + "tst x22, x22", + "lsr x20, x21, #63", + "mrs x23, nzcv", + "orr w24, w23, w20, lsl #29", + "mov x26, x22", + "eor x20, x22, x21", "lsr x21, x20, #63", - "mrs x22, nzcv", - "orr w21, w22, w21, lsl #29", - "mov x26, x4", - "eor x20, x4, x20", - "lsr x20, x20, #63", - "orr w20, w21, w20, lsl #28", + "orr w20, w24, w21, lsl #28", "msr nzcv, x20" ] }, "shld rax, rbx, 15": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "mov x20, x4", - "extr x4, x20, x7, #49", - "tst x4, x4", - "ubfx x20, x20, #49, #1", + "mov x20, x7", + "mov x21, x4", + "extr x22, x21, x20, #49", + "mov x4, x22", + "tst x22, x22", + "ubfx x20, x21, #49, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "mov x26, x4", - "msr nzcv, x20" + "orr w23, w21, w20, lsl #29", + "mov x26, x22", + "msr nzcv, x23" ] }, "shld rax, rbx, 32": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "mov x20, x4", - "extr x4, x20, x7, #32", - "tst x4, x4", - "ubfx x20, x20, #32, #1", + "mov x20, x7", + "mov x21, x4", + "extr x22, x21, x20, #32", + "mov x4, x22", + "tst x22, x22", + "ubfx x20, x21, #32, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "mov x26, x4", - "msr nzcv, x20" + "orr w23, w21, w20, lsl #29", + "mov x26, x22", + "msr nzcv, x23" ] }, "shld rax, rbx, 63": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xac", "ExpectedArm64ASM": [ - "mov x20, x4", - "extr x4, x20, x7, #1", - "tst x4, x4", - "ubfx x20, x20, #1, #1", + "mov x20, x7", + "mov x21, x4", + "extr x22, x21, x20, #1", + "mov x4, x22", + "tst x22, x22", + "ubfx x20, x21, #1, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "mov x26, x4", - "msr nzcv, x20" + "orr w23, w21, w20, lsl #29", + "mov x26, x22", + "msr nzcv, x23" ] }, "shld ax, bx, cl": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 33, "Comment": "0x0f 0xad", "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w4", - "and x22, x5, #0x1f", + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "uxth w22, w20", + "mov x23, x5", + "and x24, x23, #0x1f", "mov w23, #0x10", - "sub x23, x23, x22", - "lsl x24, x21, x22", - "lsr w20, w20, w23", - "orr x20, x24, x20", + "sub x25, x23, x24", + "lsl x23, x22, x24", + "lsr w30, w21, w25", + "orr x21, x23, x30", "mrs x23, nzcv", - "cmp x22, #0x0 (0)", - "csel x20, x21, x20, eq", - "bfxil x4, x20, #0, #16", + "cmp x24, #0x0 (0)", + "csel x25, x22, x21, eq", + "mov x21, x20", + "bfxil x21, x25, #0, #16", + "mov x4, x21", + "mov x20, x26", "msr nzcv, x23", - "cbz w22, #+0x30", - "cmn wzr, w20, lsl #16", - "mov x26, x20", + "mov x21, x20", + "cbz w24, #+0x30", + "cmn wzr, w25, lsl #16", + "mov x21, x25", "mov w0, #0x10", - "sub w0, w0, w22", - "lsr w0, w21, w0", - "eor w2, w21, w20", + "sub w0, w0, w24", + "lsr w0, w22, w0", + "eor w2, w22, w25", "mrs x1, nzcv", "bfi w1, w0, #29, #1", "lsr w2, w2, #15", "bfi w1, w2, #28, #1", - "msr nzcv, x1" + "msr nzcv, x1", + "mov x26, x21" ] }, "shld eax, ebx, cl": { - "ExpectedInstructionCount": 22, + "ExpectedInstructionCount": 29, "Comment": "0x0f 0xad", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "and x22, x5, #0x1f", - "neg x23, x22", - "lsl x24, x21, x22", - "lsr w20, w20, w23", - "orr x20, x24, x20", - "mrs x23, nzcv", - "cmp x22, #0x0 (0)", - "csel x20, x21, x20, eq", - "mov w4, w20", - "msr nzcv, x23", - "cbz w22, #+0x28", - "ands w26, w20, w20", - "neg w0, w22", - "lsr w0, w21, w0", - "eor w2, w21, w20", + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "mov w22, w20", + "mov x20, x5", + "and x23, x20, #0x1f", + "neg x20, x23", + "lsl x24, x22, x23", + "lsr w25, w21, w20", + "orr x20, x24, x25", + "mrs x21, nzcv", + "cmp x23, #0x0 (0)", + "csel x24, x22, x20, eq", + "mov w20, w24", + "mov x4, x20", + "mov x20, x26", + "msr nzcv, x21", + "mov x21, x20", + "cbz w23, #+0x28", + "ands w21, w24, w24", + "neg w0, w23", + "lsr w0, w22, w0", + "eor w2, w22, w24", "mrs x1, nzcv", "bfi w1, w0, #29, #1", "lsr w2, w2, #31", "bfi w1, w2, #28, #1", - "msr nzcv, x1" + "msr nzcv, x1", + "mov x26, x21" ] }, "shld rax, rbx, cl": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 26, "Comment": "0x0f 0xad", "ExpectedArm64ASM": [ - "mov x20, x4", - "and x21, x5, #0x3f", - "neg x22, x21", - "lsl x23, x20, x21", - "lsr x22, x7, x22", - "orr x22, x23, x22", - "mrs x23, nzcv", - "cmp x21, #0x0 (0)", - "csel x4, x20, x22, eq", - "msr nzcv, x23", - "cbz x21, #+0x28", - "ands x26, x4, x4", - "neg x0, x21", - "lsr x0, x20, x0", - "eor x2, x20, x4", + "mov x20, x7", + "mov x21, x4", + "mov x22, x5", + "and x23, x22, #0x3f", + "neg x22, x23", + "lsl x24, x21, x23", + "lsr x25, x20, x22", + "orr x20, x24, x25", + "mrs x22, nzcv", + "cmp x23, #0x0 (0)", + "csel x24, x21, x20, eq", + "mov x4, x24", + "mov x20, x26", + "msr nzcv, x22", + "mov x22, x20", + "cbz x23, #+0x28", + "ands x22, x24, x24", + "neg x0, x23", + "lsr x0, x21, x0", + "eor x2, x21, x24", "mrs x1, nzcv", "bfi w1, w0, #29, #1", "lsr x2, x2, #63", "bfi w1, w2, #28, #1", - "msr nzcv, x1" + "msr nzcv, x1", + "mov x26, x22" ] }, "push gs": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xa8", "ExpectedArm64ASM": [ - "ldr x20, [x28, #168]", - "str x20, [x8, #-8]!" + "mov x20, x8", + "ldr x21, [x28, #168]", + "mov x22, x20", + "str x21, [x22, #-8]!", + "mov x8, x22" ] }, "pop gs": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xa9", "ExpectedArm64ASM": [ - "ldr x20, [x8]", - "add x8, x8, #0x8 (8)", - "strh w20, [x28, #144]", - "ubfx w20, w20, #3, #13", + "mov x20, x8", + "ldr x21, [x20]", + "add x22, x20, #0x8 (8)", + "mov x8, x22", + "strh w21, [x28, #144]", + "ubfx w20, w21, #3, #13", "add x0, x28, x20, lsl #2", - "ldr w20, [x0, #896]", - "str w20, [x28, #168]" + "ldr w21, [x0, #896]", + "str w21, [x28, #168]" ] }, "bts ax, bx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xab", "ExpectedArm64ASM": [ - "and x20, x7, #0xf", - "lsr w21, w4, w20", - "ubfx x21, x21, #0, #1", - "lsl x21, x21, #29", - "mov w22, #0x1", - "lsl w20, w22, w20", - "orr w20, w4, w20", - "bfxil x4, x20, #0, #16", - "msr nzcv, x21" + "mov x20, x7", + "mov x21, x4", + "and x22, x20, #0xf", + "lsr w20, w21, w22", + "ubfx x23, x20, #0, #1", + "lsl x20, x23, #29", + "mov w23, #0x1", + "lsl w24, w23, w22", + "orr w22, w21, w24", + "mov x23, x21", + "bfxil x23, x22, #0, #16", + "mov x4, x23", + "msr nzcv, x20" ] }, "bts [rax], bx": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xab", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #13", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "orr x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #13", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "orr x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "bts eax, ebx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xab", "ExpectedArm64ASM": [ - "lsr w20, w4, w7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "mov w21, #0x1", - "lsl w21, w21, w7", - "orr w4, w4, w21", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "lsr w22, w21, w20", + "ubfx x23, x22, #0, #1", + "lsl x22, x23, #29", + "mov w23, #0x1", + "lsl w24, w23, w20", + "orr w20, w21, w24", + "mov x4, x20", + "msr nzcv, x22" ] }, "bts [rax], ebx": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xab", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #29", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "orr x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #29", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "orr x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "bts rax, rbx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xab", "ExpectedArm64ASM": [ - "lsr x20, x4, x7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "mov w21, #0x1", - "lsl x21, x21, x7", - "orr x4, x4, x21", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "lsr x22, x21, x20", + "ubfx x23, x22, #0, #1", + "lsl x22, x23, #29", + "mov w23, #0x1", + "lsl x24, x23, x20", + "orr x20, x21, x24", + "mov x4, x20", + "msr nzcv, x22" ] }, "bts [rax], rbx": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xab", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "asr x21, x7, #3", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "orr x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "asr x23, x20, #3", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "orr x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "lock bts [rax], bx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #13", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldsetalb w22, w21, [x21]", - "lsr w20, w21, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #13", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldsetalb w23, w21, [x20]", + "lsr w20, w21, w22", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "lock bts [rax], ebx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #29", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldsetalb w22, w21, [x21]", - "lsr w20, w21, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #29", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldsetalb w23, w21, [x20]", + "lsr w20, w21, w22", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "lock bts [rax], rbx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "asr x21, x7, #3", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldsetalb w22, w21, [x21]", - "lsr w20, w21, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "asr x23, x20, #3", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldsetalb w23, w21, [x20]", + "lsr w20, w21, w22", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "imul ax, bx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 12, "Comment": "0x0f 0xaf", "ExpectedArm64ASM": [ - "sxth x20, w4", - "sxth x21, w7", - "mul x20, x20, x21", - "sbfx x21, x20, #16, #16", - "bfxil x4, x20, #0, #16", - "sbfx x20, x20, #15, #1", - "cmp x21, x20", + "mov x20, x4", + "mov x21, x7", + "sxth x22, w20", + "sxth x23, w21", + "mul x21, x22, x23", + "sbfx x22, x21, #16, #16", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "sbfx x20, x21, #15, #1", + "cmp x22, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "imul eax, ebx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xaf", "ExpectedArm64ASM": [ - "smull x20, w4, w7", - "asr x20, x20, #32", - "mul w4, w4, w7", - "sbfx x21, x4, #31, #1", - "cmp x20, x21", + "mov x20, x4", + "mov x21, x7", + "smull x22, w20, w21", + "asr x23, x22, #32", + "mul w22, w20, w21", + "mov x4, x22", + "sbfx x20, x22, #31, #1", + "cmp x23, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "imul rax, rbx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xaf", "ExpectedArm64ASM": [ - "smulh x20, x4, x7", - "mul x4, x4, x7", - "asr x21, x4, #63", - "cmp x20, x21", + "mov x20, x4", + "mov x21, x7", + "smulh x22, x20, x21", + "mul x23, x20, x21", + "mov x4, x23", + "asr x20, x23, #63", + "cmp x22, x20", "ccmn xzr, #0, #nzCV, eq" ] }, "cmpxchg al, bl": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 17, "Comment": "0x0f 0xb0", "ExpectedArm64ASM": [ - "uxtb w20, w7", - "uxtb w21, w4", - "uxtb x22, w4", - "eor w27, w22, w21", - "lsl w0, w22, #24", - "cmp w0, w21, lsl #24", - "sub w26, w22, w21", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "bfxil x4, x20, #0, #8" + "mov x20, x7", + "uxtb w21, w20", + "mov x20, x4", + "uxtb w22, w20", + "uxtb x23, w20", + "eor w24, w23, w22", + "mov x27, x24", + "lsl w0, w23, #24", + "cmp w0, w22, lsl #24", + "sub w24, w23, w22", + "mov x26, x24", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "mov x22, x20", + "bfxil x22, x21, #0, #8", + "mov x4, x22" ] }, "cmpxchg [rax], bl": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 19, "Comment": "0x0f 0xb0", "ExpectedArm64ASM": [ - "uxtb w20, w7", - "uxtb w21, w4", - "mov w1, w21", - "casalb w1, w20, [x4]", - "mov w20, w1", - "bfxil x4, x20, #0, #8", - "eor w27, w21, w20", - "lsl w0, w21, #24", - "cmp w0, w20, lsl #24", - "sub w26, w21, w20", + "mov x20, x7", + "uxtb w21, w20", + "mov x20, x4", + "uxtb w22, w20", + "mov w1, w22", + "casalb w1, w21, [x20]", + "mov w23, w1", + "mov x21, x20", + "bfxil x21, x23, #0, #8", + "mov x4, x21", + "eor w20, w22, w23", + "mov x27, x20", + "lsl w0, w22, #24", + "cmp w0, w23, lsl #24", + "sub w20, w22, w23", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmpxchg ax, bx": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 17, "Comment": "0x0f 0xb1", "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w4", - "uxth x22, w4", - "eor w27, w22, w21", - "lsl w0, w22, #16", - "cmp w0, w21, lsl #16", - "sub w26, w22, w21", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "bfxil x4, x20, #0, #16" + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "uxth w22, w20", + "uxth x23, w20", + "eor w24, w23, w22", + "mov x27, x24", + "lsl w0, w23, #16", + "cmp w0, w22, lsl #16", + "sub w24, w23, w22", + "mov x26, x24", + "mrs x22, nzcv", + "eor w23, w22, #0x20000000", + "msr nzcv, x23", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "cmpxchg [rax], bx": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 19, "Comment": "0x0f 0xb1", "ExpectedArm64ASM": [ - "uxth w20, w7", - "uxth w21, w4", - "mov w1, w21", - "casalh w1, w20, [x4]", - "mov w20, w1", - "bfxil x4, x20, #0, #16", - "eor w27, w21, w20", - "lsl w0, w21, #16", - "cmp w0, w20, lsl #16", - "sub w26, w21, w20", + "mov x20, x7", + "uxth w21, w20", + "mov x20, x4", + "uxth w22, w20", + "mov w1, w22", + "casalh w1, w21, [x20]", + "mov w23, w1", + "mov x21, x20", + "bfxil x21, x23, #0, #16", + "mov x4, x21", + "eor w20, w22, w23", + "mov x27, x20", + "lsl w0, w22, #16", + "cmp w0, w23, lsl #16", + "sub w20, w22, w23", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmpxchg eax, ebx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xb1", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "mov w22, w4", - "eor w27, w22, w21", - "subs w26, w22, w21", - "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", - "mov x4, x20" + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "mov w22, w20", + "mov w23, w20", + "eor w20, w23, w22", + "mov x27, x20", + "subs w20, w23, w22", + "mov x26, x20", + "mrs x20, nzcv", + "eor w22, w20, #0x20000000", + "msr nzcv, x22", + "mov x4, x21" ] }, "cmpxchg [rax], ebx": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 17, "Comment": "0x0f 0xb1", "ExpectedArm64ASM": [ - "mov w20, w7", - "mov w21, w4", - "mov w1, w21", - "casal w1, w20, [x4]", - "mov w20, w1", - "cmp w20, w21", - "csel x4, x4, x20, eq", - "eor w27, w21, w20", - "subs w26, w21, w20", + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "mov w22, w20", + "mov w1, w22", + "casal w1, w21, [x20]", + "mov w23, w1", + "cmp w23, w22", + "csel x21, x20, x23, eq", + "mov x4, x21", + "eor w20, w22, w23", + "mov x27, x20", + "subs w20, w22, w23", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "cmpxchg rax, rbx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xb1", "ExpectedArm64ASM": [ "mov x20, x7", - "mov w27, #0x0", - "subs x26, x4, x4", + "mov x21, x4", + "mov w22, #0x0", + "mov x27, x22", + "subs x22, x21, x21", + "mov x26, x22", "mrs x21, nzcv", - "eor w21, w21, #0x20000000", - "msr nzcv, x21", + "eor w22, w21, #0x20000000", + "msr nzcv, x22", "mov x4, x20" ] }, "cmpxchg [rax], rbx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xb1", "ExpectedArm64ASM": [ - "mov x20, x4", - "mov x1, x20", - "casal x1, x7, [x20]", - "mov x4, x1", - "eor w27, w20, w4", - "subs x26, x20, x4", + "mov x20, x7", + "mov x21, x4", + "mov x1, x21", + "casal x1, x20, [x21]", + "mov x22, x1", + "mov x4, x22", + "eor w20, w21, w22", + "mov x27, x20", + "subs x20, x21, x22", + "mov x26, x20", "mrs x20, nzcv", - "eor w20, w20, #0x20000000", - "msr nzcv, x20" + "eor w21, w20, #0x20000000", + "msr nzcv, x21" ] }, "btr ax, bx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "and x20, x7, #0xf", - "lsr w21, w4, w20", - "ubfx x21, x21, #0, #1", - "lsl x21, x21, #29", - "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w4, w20", - "bfxil x4, x20, #0, #16", - "msr nzcv, x21" + "mov x20, x7", + "mov x21, x4", + "and x22, x20, #0xf", + "lsr w20, w21, w22", + "ubfx x23, x20, #0, #1", + "lsl x20, x23, #29", + "mov w23, #0x1", + "lsl w24, w23, w22", + "bic w22, w21, w24", + "mov x23, x21", + "bfxil x23, x22, #0, #16", + "mov x4, x23", + "msr nzcv, x20" ] }, "btr [rax], bx": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #13", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "bic x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #13", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "bic x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "btr eax, ebx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "lsr w20, w4, w7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "mov w21, #0x1", - "lsl w21, w21, w7", - "bic w4, w4, w21", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "lsr w22, w21, w20", + "ubfx x23, x22, #0, #1", + "lsl x22, x23, #29", + "mov w23, #0x1", + "lsl w24, w23, w20", + "bic w20, w21, w24", + "mov x4, x20", + "msr nzcv, x22" ] }, "btr [rax], ebx": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #29", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "bic x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #29", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "bic x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "btr rax, rbx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "lsr x20, x4, x7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "mov w21, #0x1", - "lsl x21, x21, x7", - "bic x4, x4, x21", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "lsr x22, x21, x20", + "ubfx x23, x22, #0, #1", + "lsl x22, x23, #29", + "mov w23, #0x1", + "lsl x24, x23, x20", + "bic x20, x21, x24", + "mov x4, x20", + "msr nzcv, x22" ] }, "btr [rax], rbx": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "asr x21, x7, #3", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "bic x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "asr x23, x20, #3", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "bic x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "movzx ax, bl": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xb6", "ExpectedArm64ASM": [ - "uxtb w20, w7", - "bfxil x4, x20, #0, #16" + "mov x20, x7", + "uxtb w21, w20", + "mov x20, x4", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "lock btr [rax], bx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #13", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldclralb w22, w21, [x21]", - "lsr w20, w21, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #13", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldclralb w23, w21, [x20]", + "lsr w20, w21, w22", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "lock btr [rax], ebx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #29", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldclralb w22, w21, [x21]", - "lsr w20, w21, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #29", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldclralb w23, w21, [x20]", + "lsr w20, w21, w22", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "lock btr [rax], rbx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "asr x21, x7, #3", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldclralb w22, w21, [x21]", - "lsr w20, w21, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "asr x23, x20, #3", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldclralb w23, w21, [x20]", + "lsr w20, w21, w22", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "movzx ax, byte [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xb6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "ldrb w21, [x20]", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22" ] }, "movzx eax, bl": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xb6", "ExpectedArm64ASM": [ - "uxtb w4, w7" + "mov x20, x7", + "uxtb w21, w20", + "mov x4, x21" ] }, "movzx eax, byte [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xb6", "ExpectedArm64ASM": [ - "ldrb w4, [x4]" + "mov x20, x4", + "ldrb w21, [x20]", + "mov x4, x21" ] }, "movzx rax, bl": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xb6", "ExpectedArm64ASM": [ - "uxtb w4, w7" + "mov x20, x7", + "uxtb w21, w20", + "mov x4, x21" ] }, "movzx rax, byte [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xb6", "ExpectedArm64ASM": [ - "ldrb w4, [x4]" + "mov x20, x4", + "ldrb w21, [x20]", + "mov x4, x21" ] }, "movzx eax, bx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xb7", "ExpectedArm64ASM": [ - "uxth w4, w7" + "mov x20, x7", + "uxth w21, w20", + "mov x4, x21" ] }, "movzx eax, word [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xb7", "ExpectedArm64ASM": [ - "ldrh w4, [x4]" + "mov x20, x4", + "ldrh w21, [x20]", + "mov x4, x21" ] }, "movzx rax, bx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xb7", "ExpectedArm64ASM": [ - "uxth w4, w7" + "mov x20, x7", + "uxth w21, w20", + "mov x4, x21" ] }, "movzx rax, word [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xb7", "ExpectedArm64ASM": [ - "ldrh w4, [x4]" + "mov x20, x4", + "ldrh w21, [x20]", + "mov x4, x21" ] }, "btc ax, bx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xbb", "ExpectedArm64ASM": [ - "and x20, x7, #0xf", - "lsr w21, w4, w20", - "ubfx x21, x21, #0, #1", - "lsl x21, x21, #29", - "mov w22, #0x1", - "lsl w20, w22, w20", - "eor w20, w4, w20", - "bfxil x4, x20, #0, #16", - "msr nzcv, x21" + "mov x20, x7", + "mov x21, x4", + "and x22, x20, #0xf", + "lsr w20, w21, w22", + "ubfx x23, x20, #0, #1", + "lsl x20, x23, #29", + "mov w23, #0x1", + "lsl w24, w23, w22", + "eor w22, w21, w24", + "mov x23, x21", + "bfxil x23, x22, #0, #16", + "mov x4, x23", + "msr nzcv, x20" ] }, "btc [rax], bx": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xbb", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #13", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "eor x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #13", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "eor x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "btc eax, ebx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xbb", "ExpectedArm64ASM": [ - "lsr w20, w4, w7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "mov w21, #0x1", - "lsl w21, w21, w7", - "eor w4, w4, w21", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "lsr w22, w21, w20", + "ubfx x23, x22, #0, #1", + "lsl x22, x23, #29", + "mov w23, #0x1", + "lsl w24, w23, w20", + "eor w20, w21, w24", + "mov x4, x20", + "msr nzcv, x22" ] }, "btc [rax], ebx": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xbb", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #29", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "eor x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #29", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "eor x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "btc rax, rbx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xbb", "ExpectedArm64ASM": [ - "lsr x20, x4, x7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "mov w21, #0x1", - "lsl x21, x21, x7", - "eor x4, x4, x21", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "lsr x22, x21, x20", + "ubfx x23, x22, #0, #1", + "lsl x22, x23, #29", + "mov w23, #0x1", + "lsl x24, x23, x20", + "eor x20, x21, x24", + "mov x4, x20", + "msr nzcv, x22" ] }, "btc [rax], rbx": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xbb", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "asr x21, x7, #3", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldrb w23, [x4, x21, sxtx]", - "eor x22, x23, x22", - "strb w22, [x4, x21, sxtx]", - "lsr w20, w23, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "asr x23, x20, #3", + "mov w20, #0x1", + "lsl x24, x20, x22", + "ldrb w20, [x21, x23, sxtx]", + "eor x25, x20, x24", + "strb w25, [x21, x23, sxtx]", + "lsr w21, w20, w22", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "lock btc [rax], bx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #13", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldeoralb w22, w21, [x21]", - "lsr w20, w21, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #13", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldeoralb w23, w21, [x20]", + "lsr w20, w21, w22", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "lock btc [rax], ebx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "sbfx x21, x7, #3, #29", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldeoralb w22, w21, [x21]", - "lsr w20, w21, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "sbfx x23, x20, #3, #29", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldeoralb w23, w21, [x20]", + "lsr w20, w21, w22", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "lock btc [rax], rbx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 12, "Comment": "0x0f 0xb3", "ExpectedArm64ASM": [ - "ubfx x20, x7, #0, #3", - "asr x21, x7, #3", - "add x21, x4, x21", - "mov w22, #0x1", - "lsl x22, x22, x20", - "ldeoralb w22, w21, [x21]", - "lsr w20, w21, w20", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x7", + "mov x21, x4", + "ubfx x22, x20, #0, #3", + "asr x23, x20, #3", + "add x20, x21, x23", + "mov w21, #0x1", + "lsl x23, x21, x22", + "ldeoralb w23, w21, [x20]", + "lsr w20, w21, w22", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "bsf ax, bx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 14, "Comment": "0x0f 0xbc", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w7", - "uxth w0, w21", + "mov x20, x4", + "uxth w21, w20", + "mov x22, x7", + "uxth w23, w22", + "uxth w0, w23", "cmp w0, #0x0 (0)", "rbit w0, w0", "clz w22, w0", "csinv w22, w22, wzr, ne", - "cmn wzr, w21, lsl #16", - "csel x20, x20, x22, eq", - "bfxil x4, x20, #0, #16" + "cmn wzr, w23, lsl #16", + "csel x23, x21, x22, eq", + "mov x21, x20", + "bfxil x21, x23, #0, #16", + "mov x4, x21" ] }, "bsf eax, ebx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "0x0f 0xbc", "ExpectedArm64ASM": [ - "mov w20, w7", - "lsr w0, w20, #0", + "mov x20, x4", + "mov x21, x7", + "mov w22, w21", + "lsr w0, w22, #0", "cmp w0, #0x0 (0)", "rbit w0, w0", "clz w21, w0", "csinv w21, w21, wzr, ne", - "tst w20, w20", - "csel x4, x4, x21, eq" + "tst w22, w22", + "csel x22, x20, x21, eq", + "mov x4, x22" ] }, "bsf rax, rbx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xbc", "ExpectedArm64ASM": [ - "rbit x0, x7", - "cmp x7, #0x0 (0)", - "clz x20, x0", - "csinv x20, x20, xzr, ne", - "tst x7, x7", - "csel x4, x4, x20, eq" + "mov x20, x4", + "mov x21, x7", + "rbit x0, x21", + "cmp x21, #0x0 (0)", + "clz x22, x0", + "csinv x22, x22, xzr, ne", + "tst x21, x21", + "csel x21, x20, x22, eq", + "mov x4, x21" ] }, "bsr ax, bx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 14, "Comment": "0x0f 0xbd", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w7", + "mov x20, x4", + "uxth w21, w20", + "mov x22, x7", + "uxth w23, w22", "mov x0, #0xf", - "lsl w22, w21, #16", + "lsl w22, w23, #16", "orr w22, w22, #0x8000", "clz w22, w22", "sub x22, x0, x22", - "cmn wzr, w21, lsl #16", - "csel x20, x20, x22, eq", - "bfxil x4, x20, #0, #16" + "cmn wzr, w23, lsl #16", + "csel x23, x21, x22, eq", + "mov x21, x20", + "bfxil x21, x23, #0, #16", + "mov x4, x21" ] }, "bsr eax, ebx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xbd", "ExpectedArm64ASM": [ - "mov w20, w7", + "mov x20, x4", + "mov x21, x7", + "mov w22, w21", "mov x0, #0x1f", - "clz w21, w20", + "clz w21, w22", "sub x21, x0, x21", - "tst w20, w20", - "csel x4, x4, x21, eq" + "tst w22, w22", + "csel x22, x20, x21, eq", + "mov x4, x22" ] }, "bsr rax, rbx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xbd", "ExpectedArm64ASM": [ + "mov x20, x4", + "mov x21, x7", "mov x0, #0x3f", - "clz x20, x7", - "sub x20, x0, x20", - "tst x7, x7", - "csel x4, x4, x20, eq" + "clz x22, x21", + "sub x22, x0, x22", + "tst x21, x21", + "csel x21, x20, x22, eq", + "mov x4, x21" ] }, "movsx ax, bl": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0xbe", "ExpectedArm64ASM": [ - "uxtb w20, w7", - "sxtb x20, w20", - "bfxil x4, x20, #0, #16" + "mov x20, x7", + "uxtb w21, w20", + "sxtb x20, w21", + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "movsx ax, byte [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xbe", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "sxtb x20, w20", - "bfxil x4, x20, #0, #16" + "mov x20, x4", + "ldrb w21, [x20]", + "sxtb x22, w21", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21" ] }, "movsx eax, bl": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xbe", "ExpectedArm64ASM": [ - "uxtb w20, w7", - "sxtb x20, w20", - "mov w4, w20" + "mov x20, x7", + "uxtb w21, w20", + "sxtb x20, w21", + "mov w21, w20", + "mov x4, x21" ] }, "movsx eax, byte [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xbe", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "sxtb x20, w20", - "mov w4, w20" + "mov x20, x4", + "ldrb w21, [x20]", + "sxtb x20, w21", + "mov w21, w20", + "mov x4, x21" ] }, "movsx rax, bl": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0xbe", "ExpectedArm64ASM": [ - "uxtb w20, w7", - "sxtb x4, w20" + "mov x20, x7", + "uxtb w21, w20", + "sxtb x20, w21", + "mov x4, x20" ] }, "movsx rax, byte [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0xbe", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "sxtb x4, w20" + "mov x20, x4", + "ldrb w21, [x20]", + "sxtb x20, w21", + "mov x4, x20" ] }, "movsx eax, bx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xbf", "ExpectedArm64ASM": [ - "uxth w20, w7", - "sxth x20, w20", - "mov w4, w20" + "mov x20, x7", + "uxth w21, w20", + "sxth x20, w21", + "mov w21, w20", + "mov x4, x21" ] }, "movsx eax, word [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xbf", "ExpectedArm64ASM": [ - "ldrh w20, [x4]", - "sxth x20, w20", - "mov w4, w20" + "mov x20, x4", + "ldrh w21, [x20]", + "sxth x20, w21", + "mov w21, w20", + "mov x4, x21" ] }, "movsx rax, bx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0xbf", "ExpectedArm64ASM": [ - "uxth w20, w7", - "sxth x4, w20" + "mov x20, x7", + "uxth w21, w20", + "sxth x20, w21", + "mov x4, x20" ] }, "movsx rax, word [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0xbf", "ExpectedArm64ASM": [ - "ldrh w20, [x4]", - "sxth x4, w20" + "mov x20, x4", + "ldrh w21, [x20]", + "sxth x20, w21", + "mov x4, x20" ] }, "xadd al, bl": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 16, "Comment": "0x0f 0xc0", "ExpectedArm64ASM": [ - "uxtb w20, w4", - "uxtb w21, w7", - "eor w27, w20, w21", - "lsl w0, w20, #24", - "cmn w0, w21, lsl #24", - "add w26, w20, w21", - "bfxil x7, x20, #0, #8", - "bfxil x4, x26, #0, #8" + "mov x20, x4", + "uxtb w21, w20", + "mov x22, x7", + "uxtb w23, w22", + "eor w24, w21, w23", + "mov x27, x24", + "lsl w0, w21, #24", + "cmn w0, w23, lsl #24", + "add w24, w21, w23", + "mov x26, x24", + "mov x23, x22", + "bfxil x23, x21, #0, #8", + "mov x7, x23", + "mov x21, x20", + "bfxil x21, x24, #0, #8", + "mov x4, x21" ] }, "xadd [rax], bl": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xc0", "ExpectedArm64ASM": [ - "uxtb w20, w7", - "ldaddalb w20, w21, [x4]", - "bfxil x7, x21, #0, #8", - "eor w27, w21, w20", - "lsl w0, w21, #24", - "cmn w0, w20, lsl #24", - "add w26, w21, w20" + "mov x20, x4", + "mov x21, x7", + "uxtb w22, w21", + "ldaddalb w22, w23, [x20]", + "mov x20, x21", + "bfxil x20, x23, #0, #8", + "mov x7, x20", + "eor w20, w23, w22", + "mov x27, x20", + "lsl w0, w23, #24", + "cmn w0, w22, lsl #24", + "add w20, w23, w22", + "mov x26, x20" ] }, "xadd ax, bx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 16, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ - "uxth w20, w4", - "uxth w21, w7", - "eor w27, w20, w21", - "lsl w0, w20, #16", - "cmn w0, w21, lsl #16", - "add w26, w20, w21", - "bfxil x7, x20, #0, #16", - "bfxil x4, x26, #0, #16" + "mov x20, x4", + "uxth w21, w20", + "mov x22, x7", + "uxth w23, w22", + "eor w24, w21, w23", + "mov x27, x24", + "lsl w0, w21, #16", + "cmn w0, w23, lsl #16", + "add w24, w21, w23", + "mov x26, x24", + "mov x23, x22", + "bfxil x23, x21, #0, #16", + "mov x7, x23", + "mov x21, x20", + "bfxil x21, x24, #0, #16", + "mov x4, x21" ] }, "xadd [rax], bx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 13, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ - "uxth w20, w7", - "ldaddalh w20, w21, [x4]", - "bfxil x7, x21, #0, #16", - "eor w27, w21, w20", - "lsl w0, w21, #16", - "cmn w0, w20, lsl #16", - "add w26, w21, w20" + "mov x20, x4", + "mov x21, x7", + "uxth w22, w21", + "ldaddalh w22, w23, [x20]", + "mov x20, x21", + "bfxil x20, x23, #0, #16", + "mov x7, x20", + "eor w20, w23, w22", + "mov x27, x20", + "lsl w0, w23, #16", + "cmn w0, w22, lsl #16", + "add w20, w23, w22", + "mov x26, x20" ] }, "xadd eax, ebx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ - "mov w20, w4", - "mov w21, w7", - "eor w27, w20, w21", - "adds w26, w20, w21", - "mov x7, x20", - "mov x4, x26" + "mov x20, x4", + "mov w21, w20", + "mov x20, x7", + "mov w22, w20", + "eor w20, w21, w22", + "mov x27, x20", + "adds w20, w21, w22", + "mov x26, x20", + "mov x7, x21", + "mov x4, x20" ] }, "xadd [rax], ebx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ - "mov w20, w7", - "ldaddal w20, w7, [x4]", - "eor w27, w7, w20", - "adds w26, w7, w20" + "mov x20, x4", + "mov x21, x7", + "mov w22, w21", + "ldaddal w22, w21, [x20]", + "mov x7, x21", + "eor w20, w21, w22", + "mov x27, x20", + "adds w20, w21, w22", + "mov x26, x20" ] }, "xadd rax, rbx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ "mov x20, x4", - "eor w27, w20, w7", - "adds x26, x20, x7", + "mov x21, x7", + "eor w22, w20, w21", + "mov x27, x22", + "adds x22, x20, x21", + "mov x26, x22", "mov x7, x20", - "mov x4, x26" + "mov x4, x22" ] }, "xadd [rax], rbx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xc1", "ExpectedArm64ASM": [ - "mov x20, x7", - "ldaddal x20, x7, [x4]", - "eor w27, w7, w20", - "adds x26, x7, x20" + "mov x20, x4", + "mov x21, x7", + "ldaddal x21, x22, [x20]", + "mov x7, x22", + "eor w20, w22, w21", + "mov x27, x20", + "adds x20, x22, x21", + "mov x26, x20" ] }, "cmpps xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0xc2", "ExpectedArm64ASM": [ - "fcmeq v16.4s, v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmeq v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "cmpps xmm0, xmm1, 1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0xc2", "ExpectedArm64ASM": [ - "fcmgt v16.4s, v17.4s, v16.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmgt v4.4s, v2.4s, v3.4s", + "mov v16.16b, v4.16b" ] }, "cmpps xmm0, xmm1, 2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0xc2", "ExpectedArm64ASM": [ - "fcmge v16.4s, v17.4s, v16.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmge v4.4s, v2.4s, v3.4s", + "mov v16.16b, v4.16b" ] }, "cmpps xmm0, xmm1, 3": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0xc2", "ExpectedArm64ASM": [ - "fcmge v0.4s, v16.4s, v17.4s", - "fcmgt v1.4s, v17.4s, v16.4s", - "orr v16.16b, v0.16b, v1.16b", - "mvn v16.16b, v16.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmge v0.4s, v3.4s, v2.4s", + "fcmgt v1.4s, v2.4s, v3.4s", + "orr v4.16b, v0.16b, v1.16b", + "mvn v4.16b, v4.16b", + "mov v16.16b, v4.16b" ] }, "cmpps xmm0, xmm1, 4": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc2", "ExpectedArm64ASM": [ - "fcmeq v16.4s, v16.4s, v17.4s", - "mvn v16.16b, v16.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmeq v4.4s, v3.4s, v2.4s", + "mvn v4.16b, v4.16b", + "mov v16.16b, v4.16b" ] }, "cmpps xmm0, xmm1, 5": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc2", "ExpectedArm64ASM": [ - "fcmgt v2.4s, v17.4s, v16.4s", - "mvn v16.16b, v2.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmgt v4.4s, v2.4s, v3.4s", + "mvn v2.16b, v4.16b", + "mov v16.16b, v2.16b" ] }, "cmpps xmm0, xmm1, 6": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc2", "ExpectedArm64ASM": [ - "fcmge v2.4s, v17.4s, v16.4s", - "mvn v16.16b, v2.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmge v4.4s, v2.4s, v3.4s", + "mvn v2.16b, v4.16b", + "mov v16.16b, v2.16b" ] }, "cmpps xmm0, xmm1, 7": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xc2", "ExpectedArm64ASM": [ - "fcmge v0.4s, v16.4s, v17.4s", - "fcmgt v1.4s, v17.4s, v16.4s", - "orr v16.16b, v0.16b, v1.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmge v0.4s, v3.4s, v2.4s", + "fcmgt v1.4s, v2.4s, v3.4s", + "orr v4.16b, v0.16b, v1.16b", + "mov v16.16b, v4.16b" ] }, "movnti [rax], ebx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x0f 0xc3", "ExpectedArm64ASM": [ - "mov w20, w7", - "str w20, [x4]" + "mov x20, x7", + "mov w21, w20", + "mov x20, x4", + "str w21, [x20]" ] }, "movnti [rax], rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xc3", "ExpectedArm64ASM": [ - "str x7, [x4]" + "mov x20, x7", + "mov x21, x4", + "str x20, [x21]" ] }, "pinsrw mm0, eax, 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "mov v2.h[0], w4", - "str d2, [x28, #768]" + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.h[0], w20", + "str d3, [x28, #768]" ] }, "pinsrw mm0, eax, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "mov v2.h[1], w4", - "str d2, [x28, #768]" + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.h[1], w20", + "str d3, [x28, #768]" ] }, "pinsrw mm0, eax, 2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "mov v2.h[2], w4", - "str d2, [x28, #768]" + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.h[2], w20", + "str d3, [x28, #768]" ] }, "pinsrw mm0, eax, 3": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "mov v2.h[3], w4", - "str d2, [x28, #768]" + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.h[3], w20", + "str d3, [x28, #768]" ] }, "pinsrw mm0, eax, 4": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "mov v2.h[0], w4", - "str d2, [x28, #768]" + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.h[0], w20", + "str d3, [x28, #768]" ] }, "pinsrw mm0, [rax], 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ld1 {v2.h}[0], [x4]", - "str d2, [x28, #768]" + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.h}[0], [x20]", + "str d3, [x28, #768]" ] }, "pinsrw mm0, [rax], 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ld1 {v2.h}[1], [x4]", - "str d2, [x28, #768]" + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.h}[1], [x20]", + "str d3, [x28, #768]" ] }, "pinsrw mm0, [rax], 2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ld1 {v2.h}[2], [x4]", - "str d2, [x28, #768]" + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.h}[2], [x20]", + "str d3, [x28, #768]" ] }, "pinsrw mm0, [rax], 3": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ld1 {v2.h}[3], [x4]", - "str d2, [x28, #768]" + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.h}[3], [x20]", + "str d3, [x28, #768]" ] }, "pinsrw mm0, [rax], 4": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xc4", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ld1 {v2.h}[0], [x4]", - "str d2, [x28, #768]" + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.h}[0], [x20]", + "str d3, [x28, #768]" ] }, "pextrw eax, mm0, 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xc5", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "umov w4, v2.h[0]" + "umov w20, v2.h[0]", + "mov x4, x20" ] }, "pextrw eax, mm0, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xc5", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "umov w4, v2.h[1]" + "umov w20, v2.h[1]", + "mov x4, x20" ] }, "pextrw eax, mm0, 2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xc5", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "umov w4, v2.h[2]" + "umov w20, v2.h[2]", + "mov x4, x20" ] }, "pextrw eax, mm0, 3": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xc5", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "umov w4, v2.h[3]" + "umov w20, v2.h[3]", + "mov x4, x20" ] }, "pextrw eax, mm0, 4": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xc5", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "umov w4, v2.h[0]" + "umov w20, v2.h[0]", + "mov x4, x20" ] }, "shufps xmm0, xmm1, 01000100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Dst[63:0] = Src1[63:0]", "Dest[127:64] = Src2[63:0]", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "zip1 v16.2d, v16.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "zip1 v4.2d, v2.2d, v3.2d", + "mov v16.16b, v4.16b" ] }, "shufps xmm0, xmm1, 11101110b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Dst[63:0] = Src1[127:64]", "Dest[127:64] = Src2[127:64]", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "zip2 v16.2d, v16.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "zip2 v4.2d, v2.2d, v3.2d", + "mov v16.16b, v4.16b" ] }, "shufps xmm0, xmm1, 11100100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Dst[63:0] = Src1[63:0]", "Dest[127:64] = Src2[127:64]", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "mov v16.d[1], v17.d[1]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.d[1], v3.d[1]", + "mov v16.16b, v4.16b" ] }, "shufps xmm0, xmm1, 01001110b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Dst[63:0] = Src1[63:0]", "Dest[127:64] = Src2[127:64]", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "ext v16.16b, v16.16b, v17.16b, #8" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "ext v4.16b, v2.16b, v3.16b, #8", + "mov v16.16b, v4.16b" ] }, "shufps xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Duplicate selected element between each 64-bit segment", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[0]", - "dup v3.4s, v17.s[0]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[0]", + "dup v2.4s, v3.s[0]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 00000101b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Duplicate selected element between each 64-bit segment", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[1]", - "dup v3.4s, v17.s[0]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[1]", + "dup v2.4s, v3.s[0]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 00001010b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Duplicate selected element between each 64-bit segment", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[2]", - "dup v3.4s, v17.s[0]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[2]", + "dup v2.4s, v3.s[0]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 00001111b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Duplicate selected element between each 64-bit segment", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[3]", - "dup v3.4s, v17.s[0]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[3]", + "dup v2.4s, v3.s[0]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 01010000b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Duplicate selected element between each 64-bit segment", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[0]", - "dup v3.4s, v17.s[1]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[0]", + "dup v2.4s, v3.s[1]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 01010101b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Duplicate selected element between each 64-bit segment", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[1]", - "dup v3.4s, v17.s[1]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[1]", + "dup v2.4s, v3.s[1]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 01011010b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Duplicate selected element between each 64-bit segment", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[2]", - "dup v3.4s, v17.s[1]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[2]", + "dup v2.4s, v3.s[1]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 01011111b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Duplicate selected element between each 64-bit segment", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[3]", - "dup v3.4s, v17.s[1]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[3]", + "dup v2.4s, v3.s[1]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 10100000b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Duplicate selected element between each 64-bit segment", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[0]", - "dup v3.4s, v17.s[2]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[0]", + "dup v2.4s, v3.s[2]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 10100101b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Duplicate selected element between each 64-bit segment", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[1]", - "dup v3.4s, v17.s[2]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[1]", + "dup v2.4s, v3.s[2]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 10101010b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Duplicate selected element between each 64-bit segment", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[2]", - "dup v3.4s, v17.s[2]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[2]", + "dup v2.4s, v3.s[2]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 10101111b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Duplicate selected element between each 64-bit segment", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[3]", - "dup v3.4s, v17.s[2]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[3]", + "dup v2.4s, v3.s[2]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 11110000b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Duplicate selected element between each 64-bit segment", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[0]", - "dup v3.4s, v17.s[3]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[0]", + "dup v2.4s, v3.s[3]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 11110101b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Duplicate selected element between each 64-bit segment", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[1]", - "dup v3.4s, v17.s[3]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[1]", + "dup v2.4s, v3.s[3]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 11111010b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Duplicate selected element between each 64-bit segment", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[2]", - "dup v3.4s, v17.s[3]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[2]", + "dup v2.4s, v3.s[3]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 11100000b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Bottom elements duplicated, Top 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[0]", - "zip2 v16.2d, v2.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[0]", + "zip2 v2.2d, v4.2d, v3.2d", + "mov v16.16b, v2.16b" ] }, "shufps xmm0, xmm1, 11100101b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Bottom elements duplicated, Top 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[1]", - "zip2 v16.2d, v2.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[1]", + "zip2 v2.2d, v4.2d, v3.2d", + "mov v16.16b, v2.16b" ] }, "shufps xmm0, xmm1, 11101010b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Bottom elements duplicated, Top 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[2]", - "zip2 v16.2d, v2.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[2]", + "zip2 v2.2d, v4.2d, v3.2d", + "mov v16.16b, v2.16b" ] }, "shufps xmm0, xmm1, 11101111b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Bottom elements duplicated, Top 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[3]", - "zip2 v16.2d, v2.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[3]", + "zip2 v2.2d, v4.2d, v3.2d", + "mov v16.16b, v2.16b" ] }, "shufps xmm0, xmm1, 01000000b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Bottom elements duplicated, Top 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[0]", - "zip1 v16.2d, v2.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[0]", + "zip1 v2.2d, v4.2d, v3.2d", + "mov v16.16b, v2.16b" ] }, "shufps xmm0, xmm1, 01000101b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Bottom elements duplicated, Top 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[1]", - "zip1 v16.2d, v2.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[1]", + "zip1 v2.2d, v4.2d, v3.2d", + "mov v16.16b, v2.16b" ] }, "shufps xmm0, xmm1, 01001010b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Bottom elements duplicated, Top 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[2]", - "zip1 v16.2d, v2.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[2]", + "zip1 v2.2d, v4.2d, v3.2d", + "mov v16.16b, v2.16b" ] }, "shufps xmm0, xmm1, 01001111b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Bottom elements duplicated, Top 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[3]", - "zip1 v16.2d, v2.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[3]", + "zip1 v2.2d, v4.2d, v3.2d", + "mov v16.16b, v2.16b" ] }, "shufps xmm0, xmm1, 00000100b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Top elements duplicated, Bottom 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v17.s[0]", - "zip1 v16.2d, v16.2d, v2.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v3.s[0]", + "zip1 v3.2d, v2.2d, v4.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 01010100b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Top elements duplicated, Bottom 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v17.s[1]", - "zip1 v16.2d, v16.2d, v2.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v3.s[1]", + "zip1 v3.2d, v2.2d, v4.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 10100100b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Top elements duplicated, Bottom 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v17.s[2]", - "zip1 v16.2d, v16.2d, v2.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v3.s[2]", + "zip1 v3.2d, v2.2d, v4.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 11110100b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Top elements duplicated, Bottom 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v17.s[3]", - "zip1 v16.2d, v16.2d, v2.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v3.s[3]", + "zip1 v3.2d, v2.2d, v4.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 00001110b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Top elements duplicated, Top 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v17.s[0]", - "zip2 v16.2d, v16.2d, v2.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v3.s[0]", + "zip2 v3.2d, v2.2d, v4.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 01011110b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Top elements duplicated, Top 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v17.s[1]", - "zip2 v16.2d, v16.2d, v2.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v3.s[1]", + "zip2 v3.2d, v2.2d, v4.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 10101110b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Top elements duplicated, Top 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v17.s[2]", - "zip2 v16.2d, v16.2d, v2.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v3.s[2]", + "zip2 v3.2d, v2.2d, v4.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 11111110b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Top elements duplicated, Top 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v17.s[3]", - "zip2 v16.2d, v16.2d, v2.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v3.s[3]", + "zip2 v3.2d, v2.2d, v4.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 01000111b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "odd elements inverted, Low 64-bits inserted", "SRA quirks with RA fail to understand that v16 is dead", @@ -3238,12 +4047,15 @@ ], "ExpectedArm64ASM": [ "mov v2.16b, v16.16b", - "mov v2.s[0], v16.s[3]", - "zip1 v16.2d, v2.2d, v17.2d" + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.s[0], v2.s[3]", + "zip1 v2.2d, v4.2d, v3.2d", + "mov v16.16b, v2.16b" ] }, "shufps xmm0, xmm1, 11100111b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "odd elements inverted, Top 64-bits inserted", "SRA quirks with RA fail to understand that v16 is dead", @@ -3252,110 +4064,137 @@ ], "ExpectedArm64ASM": [ "mov v2.16b, v16.16b", - "mov v2.s[0], v16.s[3]", - "mov v16.16b, v2.16b", - "mov v16.d[1], v17.d[1]" + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.s[0], v2.s[3]", + "mov v2.16b, v4.16b", + "mov v2.d[1], v3.d[1]", + "mov v16.16b, v2.16b" ] }, "shufps xmm0, xmm1, 11100001b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Lower 32-bit elements inverted, Top 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "rev64 v2.4s, v16.4s", - "mov v16.16b, v2.16b", - "mov v16.d[1], v17.d[1]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "rev64 v4.4s, v2.4s", + "mov v2.16b, v4.16b", + "mov v2.d[1], v3.d[1]", + "mov v16.16b, v2.16b" ] }, "shufps xmm0, xmm1, 01000001b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Lower 32-bit elements inverted, Low 64-bits inserted", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "rev64 v2.4s, v16.4s", - "zip1 v16.2d, v2.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "rev64 v4.4s, v2.4s", + "zip1 v2.2d, v4.2d, v3.2d", + "mov v16.16b, v2.16b" ] }, "shufps xmm0, xmm1, 11111111b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Duplicate selected element between each 64-bit segment", "0x0f 0xc6" ], "ExpectedArm64ASM": [ - "dup v2.4s, v16.s[3]", - "dup v3.4s, v17.s[3]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "dup v4.4s, v2.s[3]", + "dup v2.4s, v3.s[3]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0xc6", "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "dup v3.4s, v16.s[0]", - "dup v2.4s, v2.s[0]", - "zip1 v16.2d, v3.2d, v2.2d" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr q3, [x20]", + "dup v4.4s, v2.s[0]", + "dup v2.4s, v3.s[0]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "shufps xmm0, xmm1, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xc6", "ExpectedArm64ASM": [ + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", "ldr x0, [x28, #1768]", - "ldr q2, [x0, #16]", - "tbl v16.16b, {v16.16b, v17.16b}, v2.16b" + "ldr q4, [x0, #16]", + "tbl v5.16b, {v2.16b, v3.16b}, v4.16b", + "mov v16.16b, v5.16b" ] }, "shufps xmm1, xmm0, 1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "0x0f 0xc6", "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", "ldr x0, [x28, #1768]", - "ldr q2, [x0, #16]", - "mov v0.16b, v17.16b", - "mov v1.16b, v16.16b", - "tbl v17.16b, {v0.16b, v1.16b}, v2.16b" + "ldr q4, [x0, #16]", + "tbl v5.16b, {v2.16b, v3.16b}, v4.16b", + "mov v17.16b, v5.16b" ] }, "shufps xmm0, [rax], 1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0xc6", "ExpectedArm64ASM": [ - "ldr q2, [x4]", + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr q3, [x20]", "ldr x0, [x28, #1768]", - "ldr q3, [x0, #16]", - "mov v0.16b, v16.16b", - "mov v1.16b, v2.16b", - "tbl v16.16b, {v0.16b, v1.16b}, v3.16b" + "ldr q4, [x0, #16]", + "tbl v5.16b, {v2.16b, v3.16b}, v4.16b", + "mov v16.16b, v5.16b" ] }, "shufps xmm0, [rax], 0xFF": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0xc6", "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "dup v3.4s, v16.s[3]", - "dup v2.4s, v2.s[3]", - "zip1 v16.2d, v3.2d, v2.2d" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr q3, [x20]", + "dup v4.4s, v2.s[3]", + "dup v2.4s, v3.s[3]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "bswap eax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xc8", "ExpectedArm64ASM": [ - "rev w4, w4" + "mov x20, x4", + "rev w21, w20", + "mov x4, x21" ] }, "bswap rax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xc8", "ExpectedArm64ASM": [ - "rev x4, x4" + "mov x20, x4", + "rev x21, x20", + "mov x4, x21" ] }, "psrlw mm0, mm1": { @@ -3368,8 +4207,8 @@ "ushr d0, d0, #57", "dup v0.8h, v0.h[0]", "neg v0.8h, v0.8h", - "ushl v2.8h, v2.8h, v0.8h", - "str d2, [x28, #768]" + "ushl v4.8h, v2.8h, v0.8h", + "str d4, [x28, #768]" ] }, "psrld mm0, mm1": { @@ -3382,8 +4221,8 @@ "ushr d0, d0, #57", "dup v0.4s, v0.s[0]", "neg v0.4s, v0.4s", - "ushl v2.4s, v2.4s, v0.4s", - "str d2, [x28, #768]" + "ushl v4.4s, v2.4s, v0.4s", + "str d4, [x28, #768]" ] }, "psrlq mm0, mm1": { @@ -3396,8 +4235,8 @@ "ushr d0, d0, #57", "dup v0.2d, v0.d[0]", "neg v0.2d, v0.2d", - "ushl v2.2d, v2.2d, v0.2d", - "str d2, [x28, #768]" + "ushl v4.2d, v2.2d, v0.2d", + "str d4, [x28, #768]" ] }, "paddq mm0, mm1": { @@ -3406,8 +4245,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "add v2.2d, v3.2d, v2.2d", - "str d2, [x28, #768]" + "add v4.2d, v3.2d, v2.2d", + "str d4, [x28, #768]" ] }, "pmullw mm0, mm1": { @@ -3416,22 +4255,23 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "mul v2.8h, v3.8h, v2.8h", - "str d2, [x28, #768]" + "mul v4.8h, v3.8h, v2.8h", + "str d4, [x28, #768]" ] }, "pmovmskb eax, mm0": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xd7", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #2272]", - "cmlt v2.16b, v2.16b, #0", - "and v2.16b, v2.16b, v3.16b", - "addp v2.16b, v2.16b, v2.16b", - "addp v2.8b, v2.8b, v2.8b", - "addp v2.8b, v2.8b, v2.8b", - "umov w4, v2.h[0]" + "cmlt v4.16b, v2.16b, #0", + "and v2.16b, v4.16b, v3.16b", + "addp v3.16b, v2.16b, v2.16b", + "addp v2.8b, v3.8b, v3.8b", + "addp v3.8b, v2.8b, v2.8b", + "umov w20, v3.h[0]", + "mov x4, x20" ] }, "psubusb mm0, mm1": { @@ -3440,8 +4280,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "uqsub v2.16b, v3.16b, v2.16b", - "str d2, [x28, #768]" + "uqsub v4.16b, v3.16b, v2.16b", + "str d4, [x28, #768]" ] }, "psubusw mm0, mm1": { @@ -3450,8 +4290,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "uqsub v2.8h, v3.8h, v2.8h", - "str d2, [x28, #768]" + "uqsub v4.8h, v3.8h, v2.8h", + "str d4, [x28, #768]" ] }, "pminub mm0, mm1": { @@ -3460,8 +4300,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "umin v2.16b, v3.16b, v2.16b", - "str d2, [x28, #768]" + "umin v4.16b, v3.16b, v2.16b", + "str d4, [x28, #768]" ] }, "pand mm0, mm1": { @@ -3470,8 +4310,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "and v2.16b, v3.16b, v2.16b", - "str d2, [x28, #768]" + "and v4.16b, v3.16b, v2.16b", + "str d4, [x28, #768]" ] }, "paddusb mm0, mm1": { @@ -3480,8 +4320,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "uqadd v2.16b, v3.16b, v2.16b", - "str d2, [x28, #768]" + "uqadd v4.16b, v3.16b, v2.16b", + "str d4, [x28, #768]" ] }, "paddusw mm0, mm1": { @@ -3490,8 +4330,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "uqadd v2.8h, v3.8h, v2.8h", - "str d2, [x28, #768]" + "uqadd v4.8h, v3.8h, v2.8h", + "str d4, [x28, #768]" ] }, "pmaxub mm0, mm1": { @@ -3500,8 +4340,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "umax v2.16b, v3.16b, v2.16b", - "str d2, [x28, #768]" + "umax v4.16b, v3.16b, v2.16b", + "str d4, [x28, #768]" ] }, "pandn mm0, mm1": { @@ -3510,8 +4350,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "bic v2.16b, v2.16b, v3.16b", - "str d2, [x28, #768]" + "bic v4.16b, v2.16b, v3.16b", + "str d4, [x28, #768]" ] }, "pavgb mm0, mm1": { @@ -3520,8 +4360,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "urhadd v2.16b, v3.16b, v2.16b", - "str d2, [x28, #768]" + "urhadd v4.16b, v3.16b, v2.16b", + "str d4, [x28, #768]" ] }, "psraw mm0, mm1": { @@ -3534,8 +4374,8 @@ "ushr d0, d0, #57", "dup v0.8h, v0.h[0]", "neg v0.8h, v0.8h", - "sshl v2.8h, v2.8h, v0.8h", - "str d2, [x28, #768]" + "sshl v4.8h, v2.8h, v0.8h", + "str d4, [x28, #768]" ] }, "psrad mm0, mm1": { @@ -3548,8 +4388,8 @@ "ushr d0, d0, #57", "dup v0.4s, v0.s[0]", "neg v0.4s, v0.4s", - "sshl v2.4s, v2.4s, v0.4s", - "str d2, [x28, #768]" + "sshl v4.4s, v2.4s, v0.4s", + "str d4, [x28, #768]" ] }, "pavgw mm0, mm1": { @@ -3558,8 +4398,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "urhadd v2.8h, v3.8h, v2.8h", - "str d2, [x28, #768]" + "urhadd v4.8h, v3.8h, v2.8h", + "str d4, [x28, #768]" ] }, "pmulhuw mm0, mm1": { @@ -3568,9 +4408,9 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "umull v2.4s, v2.4h, v3.4h", - "shrn v2.4h, v2.4s, #16", - "str d2, [x28, #768]" + "umull v4.4s, v2.4h, v3.4h", + "shrn v4.4h, v4.4s, #16", + "str d4, [x28, #768]" ] }, "pmulhw mm0, mm1": { @@ -3579,17 +4419,18 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "smull v2.4s, v2.4h, v3.4h", - "shrn v2.4h, v2.4s, #16", - "str d2, [x28, #768]" + "smull v4.4s, v2.4h, v3.4h", + "shrn v4.4h, v4.4s, #16", + "str d4, [x28, #768]" ] }, "movntq [rax], mm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "0x0f 0xe7", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "str d2, [x4]" + "mov x20, x4", + "str d2, [x20]" ] }, "psubsb mm0, mm1": { @@ -3598,8 +4439,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "sqsub v2.16b, v3.16b, v2.16b", - "str d2, [x28, #768]" + "sqsub v4.16b, v3.16b, v2.16b", + "str d4, [x28, #768]" ] }, "psubsw mm0, mm1": { @@ -3608,8 +4449,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "sqsub v2.8h, v3.8h, v2.8h", - "str d2, [x28, #768]" + "sqsub v4.8h, v3.8h, v2.8h", + "str d4, [x28, #768]" ] }, "pminsw mm0, mm1": { @@ -3618,8 +4459,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "smin v2.8h, v3.8h, v2.8h", - "str d2, [x28, #768]" + "smin v4.8h, v3.8h, v2.8h", + "str d4, [x28, #768]" ] }, "por mm0, mm1": { @@ -3628,8 +4469,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "orr v2.16b, v3.16b, v2.16b", - "str d2, [x28, #768]" + "orr v4.16b, v3.16b, v2.16b", + "str d4, [x28, #768]" ] }, "paddsb mm0, mm1": { @@ -3638,8 +4479,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "sqadd v2.16b, v3.16b, v2.16b", - "str d2, [x28, #768]" + "sqadd v4.16b, v3.16b, v2.16b", + "str d4, [x28, #768]" ] }, "paddsw mm0, mm1": { @@ -3648,8 +4489,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "sqadd v2.8h, v3.8h, v2.8h", - "str d2, [x28, #768]" + "sqadd v4.8h, v3.8h, v2.8h", + "str d4, [x28, #768]" ] }, "pmaxsw mm0, mm1": { @@ -3658,8 +4499,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "smax v2.8h, v3.8h, v2.8h", - "str d2, [x28, #768]" + "smax v4.8h, v3.8h, v2.8h", + "str d4, [x28, #768]" ] }, "pxor mm0, mm1": { @@ -3668,8 +4509,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "eor v2.16b, v3.16b, v2.16b", - "str d2, [x28, #768]" + "eor v4.16b, v3.16b, v2.16b", + "str d4, [x28, #768]" ] }, "psllw mm0, mm1": { @@ -3681,8 +4522,8 @@ "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.8h, v0.h[0]", - "ushl v2.8h, v2.8h, v0.8h", - "str d2, [x28, #768]" + "ushl v4.8h, v2.8h, v0.8h", + "str d4, [x28, #768]" ] }, "pslld mm0, mm1": { @@ -3694,8 +4535,8 @@ "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.4s, v0.s[0]", - "ushl v2.4s, v2.4s, v0.4s", - "str d2, [x28, #768]" + "ushl v4.4s, v2.4s, v0.4s", + "str d4, [x28, #768]" ] }, "psllq mm0, mm1": { @@ -3707,8 +4548,8 @@ "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.2d, v0.d[0]", - "ushl v2.2d, v2.2d, v0.2d", - "str d2, [x28, #768]" + "ushl v4.2d, v2.2d, v0.2d", + "str d4, [x28, #768]" ] }, "pmuludq mm0, mm1": { @@ -3717,8 +4558,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "umull v2.2d, v2.2s, v3.2s", - "str d2, [x28, #768]" + "umull v4.2d, v2.2s, v3.2s", + "str d4, [x28, #768]" ] }, "pmaddwd mm0, mm1": { @@ -3727,8 +4568,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "smull v2.4s, v2.4h, v3.4h", - "addp v2.4s, v2.4s, v2.4s", + "smull v4.4s, v2.4h, v3.4h", + "addp v2.4s, v4.4s, v4.4s", "str d2, [x28, #768]" ] }, @@ -3738,21 +4579,23 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "uabdl v2.8h, v2.8b, v3.8b", - "addv h2, v2.8h", + "uabdl v4.8h, v2.8b, v3.8b", + "addv h2, v4.8h", "str d2, [x28, #768]" ] }, "maskmovq mm0, mm1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "0x0f 0xf7", "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", - "cmlt v2.16b, v2.16b, #0", - "ldr d3, [x28, #768]", - "ldr d4, [x11]", - "bsl v2.8b, v3.8b, v4.8b", - "str d2, [x11]" + "cmlt v3.16b, v2.16b, #0", + "ldr d2, [x28, #768]", + "mov x20, x11", + "ldr d4, [x20]", + "mov v5.8b, v3.8b", + "bsl v5.8b, v2.8b, v4.8b", + "str d5, [x20]" ] }, "psubb mm0, mm1": { @@ -3761,8 +4604,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "sub v2.16b, v3.16b, v2.16b", - "str d2, [x28, #768]" + "sub v4.16b, v3.16b, v2.16b", + "str d4, [x28, #768]" ] }, "psubw mm0, mm1": { @@ -3771,8 +4614,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "sub v2.8h, v3.8h, v2.8h", - "str d2, [x28, #768]" + "sub v4.8h, v3.8h, v2.8h", + "str d4, [x28, #768]" ] }, "psubd mm0, mm1": { @@ -3781,8 +4624,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "sub v2.4s, v3.4s, v2.4s", - "str d2, [x28, #768]" + "sub v4.4s, v3.4s, v2.4s", + "str d4, [x28, #768]" ] }, "psubq mm0, mm1": { @@ -3791,8 +4634,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "sub v2.2d, v3.2d, v2.2d", - "str d2, [x28, #768]" + "sub v4.2d, v3.2d, v2.2d", + "str d4, [x28, #768]" ] }, "paddb mm0, mm1": { @@ -3801,8 +4644,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "add v2.16b, v3.16b, v2.16b", - "str d2, [x28, #768]" + "add v4.16b, v3.16b, v2.16b", + "str d4, [x28, #768]" ] }, "paddw mm0, mm1": { @@ -3811,8 +4654,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "add v2.8h, v3.8h, v2.8h", - "str d2, [x28, #768]" + "add v4.8h, v3.8h, v2.8h", + "str d4, [x28, #768]" ] }, "paddd mm0, mm1": { @@ -3821,8 +4664,8 @@ "ExpectedArm64ASM": [ "ldr d2, [x28, #784]", "ldr d3, [x28, #768]", - "add v2.4s, v3.4s, v2.4s", - "str d2, [x28, #768]" + "add v4.4s, v3.4s, v2.4s", + "str d4, [x28, #768]" ] } } diff --git a/unittests/InstructionCountCI/SecondaryGroup.json b/unittests/InstructionCountCI/SecondaryGroup.json index c0a7059233..b913e06fcd 100644 --- a/unittests/InstructionCountCI/SecondaryGroup.json +++ b/unittests/InstructionCountCI/SecondaryGroup.json @@ -13,914 +13,1057 @@ }, "Instructions": { "sgdt [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP7 0x0F 0x1 /0", "ExpectedArm64ASM": [ - "mov w20, #0x0", - "strh w20, [x4]", - "mov x20, #0xfffffffffffe0000", - "stur x20, [x4, #2]" + "mov x20, x4", + "mov w21, #0x0", + "strh w21, [x20]", + "mov x21, #0xfffffffffffe0000", + "stur x21, [x20, #2]" ] }, "bt ax, 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /4", "ExpectedArm64ASM": [ - "ubfx x20, x4, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "bt eax, 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /4", "ExpectedArm64ASM": [ - "ubfx x20, x4, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "bt rax, 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /4", "ExpectedArm64ASM": [ - "ubfx x20, x4, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "bt ax, 15": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /4", "ExpectedArm64ASM": [ - "ubfx x20, x4, #15, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "ubfx x21, x20, #15, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "bt eax, 31": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /4", "ExpectedArm64ASM": [ - "ubfx x20, x4, #31, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "ubfx x21, x20, #31, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "bt rax, 63": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "GROUP8 0x0F 0xBA /4", "ExpectedArm64ASM": [ - "lsr x20, x4, #63", - "lsl x20, x20, #29", + "mov x20, x4", + "lsr x21, x20, #63", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "bt word [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "ldrb w21, [x20]", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "bt dword [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "ldrb w21, [x20]", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "bt qword [rax], 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "ldrb w21, [x20]", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "bt word [rax], 15": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #1]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "ldrb w21, [x20, #1]", + "lsr w20, w21, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "bt dword [rax], 31": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #3]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "ldrb w21, [x20, #3]", + "lsr w20, w21, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "bt qword [rax], 63": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #7]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "ldrb w21, [x20, #7]", + "lsr w20, w21, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "bts ax, 0": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /5", "ExpectedArm64ASM": [ - "ubfx x20, x4, #0, #1", - "lsl x20, x20, #29", - "orr w21, w4, #0x1", - "bfxil x4, x21, #0, #16", - "msr nzcv, x20" + "mov x20, x4", + "ubfx x21, x20, #0, #1", + "lsl x22, x21, #29", + "orr w21, w20, #0x1", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "msr nzcv, x22" ] }, "bts eax, 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /5", "ExpectedArm64ASM": [ - "ubfx x20, x4, #0, #1", - "lsl x20, x20, #29", - "orr w4, w4, #0x1", - "msr nzcv, x20" + "mov x20, x4", + "ubfx x21, x20, #0, #1", + "lsl x22, x21, #29", + "orr w21, w20, #0x1", + "mov x4, x21", + "msr nzcv, x22" ] }, "bts rax, 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /5", "ExpectedArm64ASM": [ - "ubfx x20, x4, #0, #1", - "lsl x20, x20, #29", - "orr x4, x4, #0x1", - "msr nzcv, x20" + "mov x20, x4", + "ubfx x21, x20, #0, #1", + "lsl x22, x21, #29", + "orr x21, x20, #0x1", + "mov x4, x21", + "msr nzcv, x22" ] }, "bts ax, 15": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /5", "ExpectedArm64ASM": [ - "ubfx x20, x4, #15, #1", - "lsl x20, x20, #29", - "orr w21, w4, #0x8000", - "bfxil x4, x21, #0, #16", - "msr nzcv, x20" + "mov x20, x4", + "ubfx x21, x20, #15, #1", + "lsl x22, x21, #29", + "orr w21, w20, #0x8000", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "msr nzcv, x22" ] }, "bts eax, 31": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /5", "ExpectedArm64ASM": [ - "ubfx x20, x4, #31, #1", - "lsl x20, x20, #29", - "orr w4, w4, #0x80000000", - "msr nzcv, x20" + "mov x20, x4", + "ubfx x21, x20, #31, #1", + "lsl x22, x21, #29", + "orr w21, w20, #0x80000000", + "mov x4, x21", + "msr nzcv, x22" ] }, "bts rax, 63": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /5", "ExpectedArm64ASM": [ - "lsr x20, x4, #63", - "lsl x20, x20, #29", - "orr x4, x4, #0x8000000000000000", - "msr nzcv, x20" + "mov x20, x4", + "lsr x21, x20, #63", + "lsl x22, x21, #29", + "orr x21, x20, #0x8000000000000000", + "mov x4, x21", + "msr nzcv, x22" ] }, "bts word [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "orr x21, x20, #0x1", - "strb w21, [x4]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "ldrb w21, [x20]", + "orr x22, x21, #0x1", + "strb w22, [x20]", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "bts dword [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "orr x21, x20, #0x1", - "strb w21, [x4]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "ldrb w21, [x20]", + "orr x22, x21, #0x1", + "strb w22, [x20]", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "bts qword [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "orr x21, x20, #0x1", - "strb w21, [x4]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "ldrb w21, [x20]", + "orr x22, x21, #0x1", + "strb w22, [x20]", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "bts word [rax], 15": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #1]", - "orr x21, x20, #0x80", - "strb w21, [x4, #1]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "ldrb w21, [x20, #1]", + "orr x22, x21, #0x80", + "strb w22, [x20, #1]", + "lsr w20, w21, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "bts dword [rax], 31": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #3]", - "orr x21, x20, #0x80", - "strb w21, [x4, #3]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "ldrb w21, [x20, #3]", + "orr x22, x21, #0x80", + "strb w22, [x20, #3]", + "lsr w20, w21, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "bts qword [rax], 63": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #7]", - "orr x21, x20, #0x80", - "strb w21, [x4, #7]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "ldrb w21, [x20, #7]", + "orr x22, x21, #0x80", + "strb w22, [x20, #7]", + "lsr w20, w21, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "lock bts word [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldsetalb w21, w20, [x20]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldsetalb w20, w22, [x21]", + "ubfx x20, x22, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "lock bts dword [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldsetalb w21, w20, [x20]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldsetalb w20, w22, [x21]", + "ubfx x20, x22, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "lock bts qword [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldsetalb w21, w20, [x20]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldsetalb w20, w22, [x21]", + "ubfx x20, x22, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "lock bts word [rax], 15": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x1 (1)", - "mov w21, #0x80", - "ldsetalb w21, w20, [x20]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "add x21, x20, #0x1 (1)", + "mov w20, #0x80", + "ldsetalb w20, w22, [x21]", + "lsr w20, w22, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "lock bts dword [rax], 31": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x3 (3)", - "mov w21, #0x80", - "ldsetalb w21, w20, [x20]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "add x21, x20, #0x3 (3)", + "mov w20, #0x80", + "ldsetalb w20, w22, [x21]", + "lsr w20, w22, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "lock bts qword [rax], 63": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x7 (7)", - "mov w21, #0x80", - "ldsetalb w21, w20, [x20]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "add x21, x20, #0x7 (7)", + "mov w20, #0x80", + "ldsetalb w20, w22, [x21]", + "lsr w20, w22, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "btr ax, 0": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ubfx x20, x4, #0, #1", - "lsl x20, x20, #29", - "and w21, w4, #0xfffffffe", - "bfxil x4, x21, #0, #16", - "msr nzcv, x20" + "mov x20, x4", + "ubfx x21, x20, #0, #1", + "lsl x22, x21, #29", + "and w21, w20, #0xfffffffe", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "msr nzcv, x22" ] }, "btr eax, 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ubfx x20, x4, #0, #1", - "lsl x20, x20, #29", - "and w4, w4, #0xfffffffe", - "msr nzcv, x20" + "mov x20, x4", + "ubfx x21, x20, #0, #1", + "lsl x22, x21, #29", + "and w21, w20, #0xfffffffe", + "mov x4, x21", + "msr nzcv, x22" ] }, "btr rax, 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ubfx x20, x4, #0, #1", - "lsl x20, x20, #29", - "and x4, x4, #0xfffffffffffffffe", - "msr nzcv, x20" + "mov x20, x4", + "ubfx x21, x20, #0, #1", + "lsl x22, x21, #29", + "and x21, x20, #0xfffffffffffffffe", + "mov x4, x21", + "msr nzcv, x22" ] }, "btr ax, 15": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ubfx x20, x4, #15, #1", - "lsl x20, x20, #29", - "and w21, w4, #0xffff7fff", - "bfxil x4, x21, #0, #16", - "msr nzcv, x20" + "mov x20, x4", + "ubfx x21, x20, #15, #1", + "lsl x22, x21, #29", + "and w21, w20, #0xffff7fff", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "msr nzcv, x22" ] }, "btr eax, 31": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ubfx x20, x4, #31, #1", - "lsl x20, x20, #29", - "and w4, w4, #0x7fffffff", - "msr nzcv, x20" + "mov x20, x4", + "ubfx x21, x20, #31, #1", + "lsl x22, x21, #29", + "and w21, w20, #0x7fffffff", + "mov x4, x21", + "msr nzcv, x22" ] }, "btr rax, 63": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "lsr x20, x4, #63", - "lsl x20, x20, #29", - "and x4, x4, #0x7fffffffffffffff", - "msr nzcv, x20" + "mov x20, x4", + "lsr x21, x20, #63", + "lsl x22, x21, #29", + "and x21, x20, #0x7fffffffffffffff", + "mov x4, x21", + "msr nzcv, x22" ] }, "btr word [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "and x21, x20, #0xfffffffffffffffe", - "strb w21, [x4]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "ldrb w21, [x20]", + "and x22, x21, #0xfffffffffffffffe", + "strb w22, [x20]", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "btr dword [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "and x21, x20, #0xfffffffffffffffe", - "strb w21, [x4]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "ldrb w21, [x20]", + "and x22, x21, #0xfffffffffffffffe", + "strb w22, [x20]", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "btr qword [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "and x21, x20, #0xfffffffffffffffe", - "strb w21, [x4]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "ldrb w21, [x20]", + "and x22, x21, #0xfffffffffffffffe", + "strb w22, [x20]", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "btr word [rax], 15": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #1]", - "and x21, x20, #0xffffffffffffff7f", - "strb w21, [x4, #1]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "ldrb w21, [x20, #1]", + "and x22, x21, #0xffffffffffffff7f", + "strb w22, [x20, #1]", + "lsr w20, w21, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "btr dword [rax], 31": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #3]", - "and x21, x20, #0xffffffffffffff7f", - "strb w21, [x4, #3]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "ldrb w21, [x20, #3]", + "and x22, x21, #0xffffffffffffff7f", + "strb w22, [x20, #3]", + "lsr w20, w21, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "btr qword [rax], 63": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #7]", - "and x21, x20, #0xffffffffffffff7f", - "strb w21, [x4, #7]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "ldrb w21, [x20, #7]", + "and x22, x21, #0xffffffffffffff7f", + "strb w22, [x20, #7]", + "lsr w20, w21, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "lock btr word [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldclralb w21, w20, [x20]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldclralb w20, w22, [x21]", + "ubfx x20, x22, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "lock btr dword [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldclralb w21, w20, [x20]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldclralb w20, w22, [x21]", + "ubfx x20, x22, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "lock btr qword [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldclralb w21, w20, [x20]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldclralb w20, w22, [x21]", + "ubfx x20, x22, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "lock btr word [rax], 15": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x1 (1)", - "mov w21, #0x80", - "ldclralb w21, w20, [x20]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "add x21, x20, #0x1 (1)", + "mov w20, #0x80", + "ldclralb w20, w22, [x21]", + "lsr w20, w22, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "lock btr dword [rax], 31": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x3 (3)", - "mov w21, #0x80", - "ldclralb w21, w20, [x20]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "add x21, x20, #0x3 (3)", + "mov w20, #0x80", + "ldclralb w20, w22, [x21]", + "lsr w20, w22, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "lock btr qword [rax], 63": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x7 (7)", - "mov w21, #0x80", - "ldclralb w21, w20, [x20]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "add x21, x20, #0x7 (7)", + "mov w20, #0x80", + "ldclralb w20, w22, [x21]", + "lsr w20, w22, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "btc ax, 0": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /7", "ExpectedArm64ASM": [ - "ubfx x20, x4, #0, #1", - "lsl x20, x20, #29", - "eor w21, w4, #0x1", - "bfxil x4, x21, #0, #16", - "msr nzcv, x20" + "mov x20, x4", + "ubfx x21, x20, #0, #1", + "lsl x22, x21, #29", + "eor w21, w20, #0x1", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "msr nzcv, x22" ] }, "btc eax, 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /7", "ExpectedArm64ASM": [ - "ubfx x20, x4, #0, #1", - "lsl x20, x20, #29", - "eor w4, w4, #0x1", - "msr nzcv, x20" + "mov x20, x4", + "ubfx x21, x20, #0, #1", + "lsl x22, x21, #29", + "eor w21, w20, #0x1", + "mov x4, x21", + "msr nzcv, x22" ] }, "btc rax, 0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /7", "ExpectedArm64ASM": [ - "ubfx x20, x4, #0, #1", - "lsl x20, x20, #29", - "eor x4, x4, #0x1", - "msr nzcv, x20" + "mov x20, x4", + "ubfx x21, x20, #0, #1", + "lsl x22, x21, #29", + "eor x21, x20, #0x1", + "mov x4, x21", + "msr nzcv, x22" ] }, "btc ax, 15": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /7", "ExpectedArm64ASM": [ - "ubfx x20, x4, #15, #1", - "lsl x20, x20, #29", - "eor w21, w4, #0x8000", - "bfxil x4, x21, #0, #16", - "msr nzcv, x20" + "mov x20, x4", + "ubfx x21, x20, #15, #1", + "lsl x22, x21, #29", + "eor w21, w20, #0x8000", + "mov x23, x20", + "bfxil x23, x21, #0, #16", + "mov x4, x23", + "msr nzcv, x22" ] }, "btc eax, 31": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /7", "ExpectedArm64ASM": [ - "ubfx x20, x4, #31, #1", - "lsl x20, x20, #29", - "eor w4, w4, #0x80000000", - "msr nzcv, x20" + "mov x20, x4", + "ubfx x21, x20, #31, #1", + "lsl x22, x21, #29", + "eor w21, w20, #0x80000000", + "mov x4, x21", + "msr nzcv, x22" ] }, "btc rax, 63": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": "GROUP8 0x0F 0xBA /7", "ExpectedArm64ASM": [ - "lsr x20, x4, #63", - "lsl x20, x20, #29", - "eor x4, x4, #0x8000000000000000", - "msr nzcv, x20" + "mov x20, x4", + "lsr x21, x20, #63", + "lsl x22, x21, #29", + "eor x21, x20, #0x8000000000000000", + "mov x4, x21", + "msr nzcv, x22" ] }, "btc word [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "eor x21, x20, #0x1", - "strb w21, [x4]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "ldrb w21, [x20]", + "eor x22, x21, #0x1", + "strb w22, [x20]", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "btc dword [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "eor x21, x20, #0x1", - "strb w21, [x4]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "ldrb w21, [x20]", + "eor x22, x21, #0x1", + "strb w22, [x20]", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "btc qword [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4]", - "eor x21, x20, #0x1", - "strb w21, [x4]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "ldrb w21, [x20]", + "eor x22, x21, #0x1", + "strb w22, [x20]", + "ubfx x20, x21, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "btc word [rax], 15": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #1]", - "eor x21, x20, #0x80", - "strb w21, [x4, #1]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "ldrb w21, [x20, #1]", + "eor x22, x21, #0x80", + "strb w22, [x20, #1]", + "lsr w20, w21, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "btc dword [rax], 31": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #3]", - "eor x21, x20, #0x80", - "strb w21, [x4, #3]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "ldrb w21, [x20, #3]", + "eor x22, x21, #0x80", + "strb w22, [x20, #3]", + "lsr w20, w21, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "btc qword [rax], 63": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "ldrb w20, [x4, #7]", - "eor x21, x20, #0x80", - "strb w21, [x4, #7]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "ldrb w21, [x20, #7]", + "eor x22, x21, #0x80", + "strb w22, [x20, #7]", + "lsr w20, w21, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "lock btc word [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldeoralb w21, w20, [x20]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldeoralb w20, w22, [x21]", + "ubfx x20, x22, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "lock btc dword [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldeoralb w21, w20, [x20]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldeoralb w20, w22, [x21]", + "ubfx x20, x22, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "lock btc qword [rax], 0": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x0 (0)", - "mov w21, #0x1", - "ldeoralb w21, w20, [x20]", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", - "msr nzcv, x20" + "mov x20, x4", + "add x21, x20, #0x0 (0)", + "mov w20, #0x1", + "ldeoralb w20, w22, [x21]", + "ubfx x20, x22, #0, #1", + "lsl x21, x20, #29", + "msr nzcv, x21" ] }, "lock btc word [rax], 15": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x1 (1)", - "mov w21, #0x80", - "ldeoralb w21, w20, [x20]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "add x21, x20, #0x1 (1)", + "mov w20, #0x80", + "ldeoralb w20, w22, [x21]", + "lsr w20, w22, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "lock btc dword [rax], 31": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x3 (3)", - "mov w21, #0x80", - "ldeoralb w21, w20, [x20]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "add x21, x20, #0x3 (3)", + "mov w20, #0x80", + "ldeoralb w20, w22, [x21]", + "lsr w20, w22, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "lock btc qword [rax], 63": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": "GROUP8 0x0F 0xBA /6", "ExpectedArm64ASM": [ - "add x20, x4, #0x7 (7)", - "mov w21, #0x80", - "ldeoralb w21, w20, [x20]", - "lsr w20, w20, #7", - "ubfx x20, x20, #0, #1", - "lsl x20, x20, #29", + "mov x20, x4", + "add x21, x20, #0x7 (7)", + "mov w20, #0x80", + "ldeoralb w20, w22, [x21]", + "lsr w20, w22, #7", + "ubfx x21, x20, #0, #1", + "lsl x20, x21, #29", "msr nzcv, x20" ] }, "cmpxchg8b [rbp]": { - "ExpectedInstructionCount": 24, + "ExpectedInstructionCount": 39, "Comment": "GROUP9 0x0F 0xC7 /1", "ExpectedArm64ASM": [ - "add x20, x9, #0x0 (0)", - "mov w21, w4", - "mov w22, w6", - "mov w23, w22", - "mov w22, w21", - "mov w21, w7", - "mov w24, w5", - "mov w25, w24", - "mov w24, w21", - "mov w2, w22", - "mov w3, w23", - "caspal w2, w3, w24, w25, [x20]", + "sub sp, sp, #0x40 (64)", + "mov x20, x9", + "add x21, x20, #0x0 (0)", + "mov x20, x4", + "mov w22, w20", + "mov x23, x6", + "mov w24, w23", + "mov x30, x24", + "mov w24, w22", + "mov w25, w30", + "mov x22, x7", + "mov w30, w22", + "mov x22, x5", + "mov w18, w22", + "str x23, [sp]", + "mov w22, w30", + "mov w23, w18", + "str x20, [sp, #32]", + "mov x30, x21", + "mov w2, w24", + "mov w3, w25", + "caspal w2, w3, w22, w23, [x30]", "mov w20, w2", "mov w21, w3", - "mov w24, w20", - "mov w25, w21", + "mov w22, w20", + "mov w23, w21", "mrs x0, nzcv", - "cmp w20, w22", - "ccmp w21, w23, #nzcv, eq", + "cmp w20, w24", + "ccmp w21, w25, #nzcv, eq", "cset w1, eq", "bfi w0, w1, #30, #1", "msr nzcv, x0", - "csel x4, x24, x4, ne", - "csel x6, x25, x6, ne" + "ldr x20, [sp, #32]", + "csel x21, x22, x20, ne", + "mov x4, x21", + "ldr x20, [sp]", + "csel x21, x23, x20, ne", + "mov x6, x21", + "add sp, sp, #0x40 (64)" ] }, "cmpxchg16b [rbp]": { - "ExpectedInstructionCount": 20, + "ExpectedInstructionCount": 35, "Comment": "GROUP9 0x0F 0xC7 /1", "ExpectedArm64ASM": [ - "add x20, x9, #0x0 (0)", - "mov x22, x4", - "mov x23, x6", - "mov x24, x7", - "mov x25, x5", - "mov x2, x22", - "mov x3, x23", - "caspal x2, x3, x24, x25, [x20]", + "sub sp, sp, #0x40 (64)", + "mov x20, x9", + "add x21, x20, #0x0 (0)", + "mov x20, x4", + "mov x22, x6", + "mov x24, x20", + "mov x25, x22", + "mov x23, x7", + "mov x30, x5", + "str x22, [sp]", + "mov x18, x23", + "mov x22, x18", + "mov x23, x30", + "str x20, [sp, #32]", + "mov x30, x21", + "mov x2, x24", + "mov x3, x25", + "caspal x2, x3, x22, x23, [x30]", "mov x20, x2", "mov x21, x3", - "mov x24, x20", - "mov x25, x21", + "mov x22, x20", + "mov x23, x21", "mrs x0, nzcv", - "cmp w20, w22", - "ccmp w21, w23, #nzcv, eq", + "cmp w20, w24", + "ccmp w21, w25, #nzcv, eq", "cset w1, eq", "bfi w0, w1, #30, #1", "msr nzcv, x0", - "csel x4, x24, x4, ne", - "csel x6, x25, x6, ne" + "ldr x20, [sp, #32]", + "csel x21, x22, x20, ne", + "mov x4, x21", + "ldr x20, [sp]", + "csel x21, x23, x20, ne", + "mov x6, x21", + "add sp, sp, #0x40 (64)" ] }, "rdrand ax": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 14, "Comment": "GROUP9 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "mrs x20, rndr", "cset x21, ne", "mov x22, x20", - "mov x20, x21", - "bfxil x4, x22, #0, #16", - "mov w26, #0x1", - "mov w27, #0x0", - "lsl x20, x20, #29", + "mov x23, x21", + "mov x20, x4", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21", + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20", + "lsl x20, x23, #29", "msr nzcv, x20" ] }, "rdrand eax": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": "GROUP9 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "mrs x20, rndr", "cset x21, ne", "mov x22, x20", - "mov x20, x21", - "mov w4, w22", - "mov w26, #0x1", - "mov w27, #0x0", - "lsl x20, x20, #29", + "mov x23, x21", + "mov w20, w22", + "mov x4, x20", + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20", + "lsl x20, x23, #29", "msr nzcv, x20" ] }, "rdrand rax": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "GROUP9 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "mrs x20, rndr", "cset x21, ne", - "mov x4, x20", - "mov x20, x21", - "mov w26, #0x1", - "mov w27, #0x0", - "lsl x20, x20, #29", + "mov x22, x20", + "mov x23, x21", + "mov x4, x22", + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20", + "lsl x20, x23, #29", "msr nzcv, x20" ] }, "rdseed ax": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 14, "Comment": "GROUP9 0x0F 0xC7 /7", "ExpectedArm64ASM": [ "mrs x20, rndrrs", "cset x21, ne", "mov x22, x20", - "mov x20, x21", - "bfxil x4, x22, #0, #16", - "mov w26, #0x1", - "mov w27, #0x0", - "lsl x20, x20, #29", + "mov x23, x21", + "mov x20, x4", + "mov x21, x20", + "bfxil x21, x22, #0, #16", + "mov x4, x21", + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20", + "lsl x20, x23, #29", "msr nzcv, x20" ] }, "rdseed eax": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": "GROUP9 0x0F 0xC7 /7", "ExpectedArm64ASM": [ "mrs x20, rndrrs", "cset x21, ne", "mov x22, x20", - "mov x20, x21", - "mov w4, w22", - "mov w26, #0x1", - "mov w27, #0x0", - "lsl x20, x20, #29", + "mov x23, x21", + "mov w20, w22", + "mov x4, x20", + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20", + "lsl x20, x23, #29", "msr nzcv, x20" ] }, "rdseed rax": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 11, "Comment": "GROUP9 0x0F 0xC7 /7", "ExpectedArm64ASM": [ "mrs x20, rndrrs", "cset x21, ne", - "mov x4, x20", - "mov x20, x21", - "mov w26, #0x1", - "mov w27, #0x0", - "lsl x20, x20, #29", + "mov x22, x20", + "mov x23, x21", + "mov x4, x22", + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20", + "lsl x20, x23, #29", "msr nzcv, x20" ] }, "rdpid eax": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 19, "Comment": "GROUP9 0xF3 0x0F 0xC7 /7", "ExpectedArm64ASM": [ "mrs x0, nzcv", @@ -940,11 +1083,12 @@ "ldr x8, [x28, #40]", "str xzr, [x28, #1056]", "orr x20, x0, x1, lsl #12", - "mov w4, w20" + "mov w21, w20", + "mov x4, x21" ] }, "rdpid rax": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 19, "Comment": "GROUP9 0xF3 0x0F 0xC7 /7", "ExpectedArm64ASM": [ "mrs x0, nzcv", @@ -964,7 +1108,8 @@ "ldr x8, [x28, #40]", "str xzr, [x28, #1056]", "orr x20, x0, x1, lsl #12", - "mov w4, w20" + "mov w21, w20", + "mov x4, x21" ] }, "psrlw mm0, 0": { @@ -979,8 +1124,8 @@ "Comment": "GROUP12 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ushr v2.8h, v2.8h, #15", - "str d2, [x28, #768]" + "ushr v3.8h, v2.8h, #15", + "str d3, [x28, #768]" ] }, "psrlw mm0, 16": { @@ -989,8 +1134,8 @@ "Comment": "GROUP12 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "movi v2.2d, #0x0", - "str d2, [x28, #768]" + "movi v3.2d, #0x0", + "str d3, [x28, #768]" ] }, "psrlw xmm0, 0": { @@ -1000,19 +1145,23 @@ "ExpectedArm64ASM": [] }, "psrlw xmm0, 15": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP12 0x0F 0xC7 /2", "ExpectedArm64ASM": [ - "ushr v16.8h, v16.8h, #15" + "mov v2.16b, v16.16b", + "ushr v3.8h, v2.8h, #15", + "mov v16.16b, v3.16b" ] }, "psrlw xmm0, 16": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP12 0x0F 0xC7 /2", "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov v2.16b, v16.16b", + "movi v3.2d, #0x0", + "mov v16.16b, v3.16b" ] }, "psraw mm0, 0": { @@ -1027,8 +1176,8 @@ "Comment": "GROUP12 0x0F 0xC7 /3", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "sshr v2.8h, v2.8h, #15", - "str d2, [x28, #768]" + "sshr v3.8h, v2.8h, #15", + "str d3, [x28, #768]" ] }, "psraw mm0, 16": { @@ -1037,8 +1186,8 @@ "Comment": "GROUP12 0x0F 0xC7 /3", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "sshr v2.8h, v2.8h, #15", - "str d2, [x28, #768]" + "sshr v3.8h, v2.8h, #15", + "str d3, [x28, #768]" ] }, "psraw xmm0, 0": { @@ -1048,19 +1197,23 @@ "ExpectedArm64ASM": [] }, "psraw xmm0, 15": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP12 0x0F 0xC7 /3", "ExpectedArm64ASM": [ - "sshr v16.8h, v16.8h, #15" + "mov v2.16b, v16.16b", + "sshr v3.8h, v2.8h, #15", + "mov v16.16b, v3.16b" ] }, "psraw xmm0, 16": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP12 0x0F 0xC7 /3", "ExpectedArm64ASM": [ - "sshr v16.8h, v16.8h, #15" + "mov v2.16b, v16.16b", + "sshr v3.8h, v2.8h, #15", + "mov v16.16b, v3.16b" ] }, "psllw mm0, 0": { @@ -1075,8 +1228,8 @@ "Comment": "GROUP12 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "shl v2.8h, v2.8h, #15", - "str d2, [x28, #768]" + "shl v3.8h, v2.8h, #15", + "str d3, [x28, #768]" ] }, "psllw mm0, 16": { @@ -1085,8 +1238,8 @@ "Comment": "GROUP12 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "movi v2.2d, #0x0", - "str d2, [x28, #768]" + "movi v3.2d, #0x0", + "str d3, [x28, #768]" ] }, "psllw xmm0, 0": { @@ -1096,19 +1249,23 @@ "ExpectedArm64ASM": [] }, "psllw xmm0, 15": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP12 0x0F 0xC7 /6", "ExpectedArm64ASM": [ - "shl v16.8h, v16.8h, #15" + "mov v2.16b, v16.16b", + "shl v3.8h, v2.8h, #15", + "mov v16.16b, v3.16b" ] }, "psllw xmm0, 16": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP12 0x0F 0xC7 /6", "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov v2.16b, v16.16b", + "movi v3.2d, #0x0", + "mov v16.16b, v3.16b" ] }, "psrld mm0, 0": { @@ -1123,8 +1280,8 @@ "Comment": "GROUP13 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ushr v2.4s, v2.4s, #31", - "str d2, [x28, #768]" + "ushr v3.4s, v2.4s, #31", + "str d3, [x28, #768]" ] }, "psrld mm0, 32": { @@ -1133,8 +1290,8 @@ "Comment": "GROUP13 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "movi v2.2d, #0x0", - "str d2, [x28, #768]" + "movi v3.2d, #0x0", + "str d3, [x28, #768]" ] }, "psrld xmm0, 0": { @@ -1144,19 +1301,23 @@ "ExpectedArm64ASM": [] }, "psrld xmm0, 31": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP13 0x0F 0xC7 /2", "ExpectedArm64ASM": [ - "ushr v16.4s, v16.4s, #31" + "mov v2.16b, v16.16b", + "ushr v3.4s, v2.4s, #31", + "mov v16.16b, v3.16b" ] }, "psrld xmm0, 32": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP13 0x0F 0xC7 /2", "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov v2.16b, v16.16b", + "movi v3.2d, #0x0", + "mov v16.16b, v3.16b" ] }, "psrad mm0, 0": { @@ -1171,8 +1332,8 @@ "Comment": "GROUP13 0x0F 0xC7 /3", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "sshr v2.4s, v2.4s, #31", - "str d2, [x28, #768]" + "sshr v3.4s, v2.4s, #31", + "str d3, [x28, #768]" ] }, "psrad mm0, 32": { @@ -1181,8 +1342,8 @@ "Comment": "GROUP13 0x0F 0xC7 /3", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "sshr v2.4s, v2.4s, #31", - "str d2, [x28, #768]" + "sshr v3.4s, v2.4s, #31", + "str d3, [x28, #768]" ] }, "psrad xmm0, 0": { @@ -1192,19 +1353,23 @@ "ExpectedArm64ASM": [] }, "psrad xmm0, 31": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP13 0x0F 0xC7 /3", "ExpectedArm64ASM": [ - "sshr v16.4s, v16.4s, #31" + "mov v2.16b, v16.16b", + "sshr v3.4s, v2.4s, #31", + "mov v16.16b, v3.16b" ] }, "psrad xmm0, 32": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP13 0x0F 0xC7 /3", "ExpectedArm64ASM": [ - "sshr v16.4s, v16.4s, #31" + "mov v2.16b, v16.16b", + "sshr v3.4s, v2.4s, #31", + "mov v16.16b, v3.16b" ] }, "pslld mm0, 0": { @@ -1219,8 +1384,8 @@ "Comment": "GROUP13 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "shl v2.4s, v2.4s, #31", - "str d2, [x28, #768]" + "shl v3.4s, v2.4s, #31", + "str d3, [x28, #768]" ] }, "pslld mm0, 32": { @@ -1229,8 +1394,8 @@ "Comment": "GROUP13 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "movi v2.2d, #0x0", - "str d2, [x28, #768]" + "movi v3.2d, #0x0", + "str d3, [x28, #768]" ] }, "pslld xmm0, 0": { @@ -1240,19 +1405,23 @@ "ExpectedArm64ASM": [] }, "pslld xmm0, 31": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP13 0x0F 0xC7 /6", "ExpectedArm64ASM": [ - "shl v16.4s, v16.4s, #31" + "mov v2.16b, v16.16b", + "shl v3.4s, v2.4s, #31", + "mov v16.16b, v3.16b" ] }, "pslld xmm0, 32": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP13 0x0F 0xC7 /6", "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov v2.16b, v16.16b", + "movi v3.2d, #0x0", + "mov v16.16b, v3.16b" ] }, "psrlq mm0, 0": { @@ -1267,8 +1436,8 @@ "Comment": "GROUP14 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "ushr v2.2d, v2.2d, #63", - "str d2, [x28, #768]" + "ushr v3.2d, v2.2d, #63", + "str d3, [x28, #768]" ] }, "psrlq mm0, 64": { @@ -1277,8 +1446,8 @@ "Comment": "GROUP14 0x0F 0xC7 /2", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "movi v2.2d, #0x0", - "str d2, [x28, #768]" + "movi v3.2d, #0x0", + "str d3, [x28, #768]" ] }, "psrlq xmm0, 0": { @@ -1288,19 +1457,23 @@ "ExpectedArm64ASM": [] }, "psrlq xmm0, 63": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP14 0x0F 0xC7 /2", "ExpectedArm64ASM": [ - "ushr v16.2d, v16.2d, #63" + "mov v2.16b, v16.16b", + "ushr v3.2d, v2.2d, #63", + "mov v16.16b, v3.16b" ] }, "psrlq xmm0, 64": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP14 0x0F 0xC7 /2", "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov v2.16b, v16.16b", + "movi v3.2d, #0x0", + "mov v16.16b, v3.16b" ] }, "psrldq xmm0, 0": { @@ -1310,20 +1483,23 @@ "ExpectedArm64ASM": [] }, "psrldq xmm0, 15": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Type": "SSE", "Comment": "GROUP14 0x0F 0xC7 /3", "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "ext v16.16b, v16.16b, v2.16b, #15" + "mov v2.16b, v16.16b", + "movi v3.2d, #0x0", + "ext v4.16b, v2.16b, v3.16b, #15", + "mov v16.16b, v4.16b" ] }, "psrldq xmm0, 16": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Type": "SSE", "Comment": "GROUP14 0x0F 0xC7 /3", "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b" ] }, "psllq mm0, 0": { @@ -1338,8 +1514,8 @@ "Comment": "GROUP14 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "shl v2.2d, v2.2d, #63", - "str d2, [x28, #768]" + "shl v3.2d, v2.2d, #63", + "str d3, [x28, #768]" ] }, "psllq mm0, 64": { @@ -1348,8 +1524,8 @@ "Comment": "GROUP14 0x0F 0xC7 /6", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "movi v2.2d, #0x0", - "str d2, [x28, #768]" + "movi v3.2d, #0x0", + "str d3, [x28, #768]" ] }, "psllq xmm0, 0": { @@ -1359,152 +1535,194 @@ "ExpectedArm64ASM": [] }, "psllq xmm0, 63": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP14 0x0F 0xC7 /6", "ExpectedArm64ASM": [ - "shl v16.2d, v16.2d, #63" + "mov v2.16b, v16.16b", + "shl v3.2d, v2.2d, #63", + "mov v16.16b, v3.16b" ] }, "psllq xmm0, 64": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Type": "SSE", "Comment": "GROUP14 0x0F 0xC7 /6", "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov v2.16b, v16.16b", + "movi v3.2d, #0x0", + "mov v16.16b, v3.16b" ] }, "fxsave [rax]": { - "ExpectedInstructionCount": 58, + "ExpectedInstructionCount": 77, "Comment": "GROUP15 0x0F 0xAE /0", "ExpectedArm64ASM": [ - "ldrh w20, [x28, #1024]", - "strh w20, [x4]", - "mov w20, #0x0", - "ldrb w21, [x28, #747]", - "bfi x20, x21, #11, #3", + "mov x20, x4", + "ldrh w21, [x28, #1024]", + "strh w21, [x20]", + "mov w21, #0x0", + "ldrb w22, [x28, #747]", + "mov x23, x21", + "bfi x23, x22, #11, #3", "ldrb w21, [x28, #744]", "ldrb w22, [x28, #745]", - "ldrb w23, [x28, #746]", - "ldrb w24, [x28, #750]", - "orr x20, x20, x21, lsl #8", - "orr x20, x20, x22, lsl #9", - "orr x20, x20, x23, lsl #10", - "orr x20, x20, x24, lsl #14", - "strh w20, [x4, #2]", - "ldrb w20, [x28, #1026]", - "strb w20, [x4, #4]", + "ldrb w24, [x28, #746]", + "ldrb w25, [x28, #750]", + "orr x30, x23, x21, lsl #8", + "orr x21, x30, x22, lsl #9", + "orr x22, x21, x24, lsl #10", + "orr x21, x22, x25, lsl #14", + "strh w21, [x20, #2]", + "ldrb w21, [x28, #1026]", + "strb w21, [x20, #4]", "ldr q2, [x28, #768]", - "str q2, [x4, #32]", + "str q2, [x20, #32]", "ldr q2, [x28, #784]", - "str q2, [x4, #48]", + "str q2, [x20, #48]", "ldr q2, [x28, #800]", - "str q2, [x4, #64]", + "str q2, [x20, #64]", "ldr q2, [x28, #816]", - "str q2, [x4, #80]", + "str q2, [x20, #80]", "ldr q2, [x28, #832]", - "str q2, [x4, #96]", + "str q2, [x20, #96]", "ldr q2, [x28, #848]", - "str q2, [x4, #112]", + "str q2, [x20, #112]", "ldr q2, [x28, #864]", - "str q2, [x4, #128]", + "str q2, [x20, #128]", "ldr q2, [x28, #880]", - "str q2, [x4, #144]", - "str q16, [x4, #160]", - "str q17, [x4, #176]", - "str q18, [x4, #192]", - "str q19, [x4, #208]", - "str q20, [x4, #224]", - "str q21, [x4, #240]", - "str q22, [x4, #256]", - "str q23, [x4, #272]", - "str q24, [x4, #288]", - "str q25, [x4, #304]", - "str q26, [x4, #320]", - "str q27, [x4, #336]", - "str q28, [x4, #352]", - "str q29, [x4, #368]", - "str q30, [x4, #384]", - "str q31, [x4, #400]", - "mov w20, #0x1f80", - "mrs x21, fpcr", - "ubfx x21, x21, #22, #3", - "rbit w0, w21", - "bfi x21, x0, #30, #2", - "bfi w20, w21, #13, #3", - "add x21, x4, #0x18 (24)", - "str w20, [x4, #24]", + "str q2, [x20, #144]", + "mov v2.16b, v16.16b", + "str q2, [x20, #160]", + "mov v2.16b, v17.16b", + "str q2, [x20, #176]", + "mov v2.16b, v18.16b", + "str q2, [x20, #192]", + "mov v2.16b, v19.16b", + "str q2, [x20, #208]", + "mov v2.16b, v20.16b", + "str q2, [x20, #224]", + "mov v2.16b, v21.16b", + "str q2, [x20, #240]", + "mov v2.16b, v22.16b", + "str q2, [x20, #256]", + "mov v2.16b, v23.16b", + "str q2, [x20, #272]", + "mov v2.16b, v24.16b", + "str q2, [x20, #288]", + "mov v2.16b, v25.16b", + "str q2, [x20, #304]", + "mov v2.16b, v26.16b", + "str q2, [x20, #320]", + "mov v2.16b, v27.16b", + "str q2, [x20, #336]", + "mov v2.16b, v28.16b", + "str q2, [x20, #352]", + "mov v2.16b, v29.16b", + "str q2, [x20, #368]", + "mov v2.16b, v30.16b", + "str q2, [x20, #384]", + "mov v2.16b, v31.16b", + "str q2, [x20, #400]", + "mov w21, #0x1f80", + "mrs x22, fpcr", + "ubfx x22, x22, #22, #3", + "rbit w0, w22", + "bfi x22, x0, #30, #2", + "mov w23, w21", + "bfi w23, w22, #13, #3", + "add x21, x20, #0x18 (24)", + "str w23, [x20, #24]", "mov w20, #0xffff", "str w20, [x21, #4]" ] }, "rdfsbase eax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP15 0x0F 0xAE /0", "ExpectedArm64ASM": [ - "ldr w4, [x28, #176]" + "ldr w20, [x28, #176]", + "mov x4, x20" ] }, "rdfsbase rax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP15 0x0F 0xAE /0", "ExpectedArm64ASM": [ - "ldr x4, [x28, #176]" + "ldr x20, [x28, #176]", + "mov x4, x20" ] }, "fxrstor [rax]": { - "ExpectedInstructionCount": 56, + "ExpectedInstructionCount": 73, "Comment": "GROUP15 0x0F 0xAE /1", "ExpectedArm64ASM": [ - "ldrh w20, [x4]", - "strh w20, [x28, #1024]", - "ldrh w20, [x4, #2]", - "ubfx w21, w20, #11, #3", - "strb w21, [x28, #747]", - "ubfx w21, w20, #8, #1", - "ubfx w22, w20, #9, #1", - "ubfx w23, w20, #10, #1", - "ubfx w20, w20, #14, #1", - "strb w21, [x28, #744]", - "strb w22, [x28, #745]", - "strb w23, [x28, #746]", - "strb w20, [x28, #750]", - "ldrb w20, [x4, #4]", - "strb w20, [x28, #1026]", - "ldr q2, [x4, #32]", + "mov x20, x4", + "ldrh w21, [x20]", + "strh w21, [x28, #1024]", + "ldrh w21, [x20, #2]", + "ubfx w22, w21, #11, #3", + "strb w22, [x28, #747]", + "ubfx w22, w21, #8, #1", + "ubfx w23, w21, #9, #1", + "ubfx w24, w21, #10, #1", + "ubfx w25, w21, #14, #1", + "strb w22, [x28, #744]", + "strb w23, [x28, #745]", + "strb w24, [x28, #746]", + "strb w25, [x28, #750]", + "ldrb w21, [x20, #4]", + "strb w21, [x28, #1026]", + "ldr q2, [x20, #32]", "str q2, [x28, #768]", - "ldr q2, [x4, #48]", + "ldr q2, [x20, #48]", "str q2, [x28, #784]", - "ldr q2, [x4, #64]", + "ldr q2, [x20, #64]", "str q2, [x28, #800]", - "ldr q2, [x4, #80]", + "ldr q2, [x20, #80]", "str q2, [x28, #816]", - "ldr q2, [x4, #96]", + "ldr q2, [x20, #96]", "str q2, [x28, #832]", - "ldr q2, [x4, #112]", + "ldr q2, [x20, #112]", "str q2, [x28, #848]", - "ldr q2, [x4, #128]", + "ldr q2, [x20, #128]", "str q2, [x28, #864]", - "ldr q2, [x4, #144]", + "ldr q2, [x20, #144]", "str q2, [x28, #880]", - "ldr q16, [x4, #160]", - "ldr q17, [x4, #176]", - "ldr q18, [x4, #192]", - "ldr q19, [x4, #208]", - "ldr q20, [x4, #224]", - "ldr q21, [x4, #240]", - "ldr q22, [x4, #256]", - "ldr q23, [x4, #272]", - "ldr q24, [x4, #288]", - "ldr q25, [x4, #304]", - "ldr q26, [x4, #320]", - "ldr q27, [x4, #336]", - "ldr q28, [x4, #352]", - "ldr q29, [x4, #368]", - "ldr q30, [x4, #384]", - "ldr q31, [x4, #400]", - "ldr w20, [x4, #24]", - "ubfx w20, w20, #13, #3", + "ldr q2, [x20, #160]", + "mov v16.16b, v2.16b", + "ldr q2, [x20, #176]", + "mov v17.16b, v2.16b", + "ldr q2, [x20, #192]", + "mov v18.16b, v2.16b", + "ldr q2, [x20, #208]", + "mov v19.16b, v2.16b", + "ldr q2, [x20, #224]", + "mov v20.16b, v2.16b", + "ldr q2, [x20, #240]", + "mov v21.16b, v2.16b", + "ldr q2, [x20, #256]", + "mov v22.16b, v2.16b", + "ldr q2, [x20, #272]", + "mov v23.16b, v2.16b", + "ldr q2, [x20, #288]", + "mov v24.16b, v2.16b", + "ldr q2, [x20, #304]", + "mov v25.16b, v2.16b", + "ldr q2, [x20, #320]", + "mov v26.16b, v2.16b", + "ldr q2, [x20, #336]", + "mov v27.16b, v2.16b", + "ldr q2, [x20, #352]", + "mov v28.16b, v2.16b", + "ldr q2, [x20, #368]", + "mov v29.16b, v2.16b", + "ldr q2, [x20, #384]", + "mov v30.16b, v2.16b", + "ldr q2, [x20, #400]", + "mov v31.16b, v2.16b", + "ldr w21, [x20, #24]", + "ubfx w20, w21, #13, #3", "rbit w1, w20", "lsr w1, w1, #30", "mrs x0, fpcr", @@ -1515,25 +1733,28 @@ ] }, "rdgsbase eax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP15 0x0F 0xAE /1", "ExpectedArm64ASM": [ - "ldr w4, [x28, #168]" + "ldr w20, [x28, #168]", + "mov x4, x20" ] }, "rdgsbase rax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP15 0x0F 0xAE /1", "ExpectedArm64ASM": [ - "ldr x4, [x28, #168]" + "ldr x20, [x28, #168]", + "mov x4, x20" ] }, "ldmxcsr [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": "GROUP15 0x0F 0xAE /2", "ExpectedArm64ASM": [ - "ldr w20, [x4]", - "ubfx w20, w20, #13, #3", + "mov x20, x4", + "ldr w21, [x20]", + "ubfx w20, w21, #13, #3", "rbit w1, w20", "lsr w1, w1, #30", "mrs x0, fpcr", @@ -1544,22 +1765,24 @@ ] }, "wrfsbase eax": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP15 0x0F 0xAE /2", "ExpectedArm64ASM": [ - "mov w20, w4", - "str x20, [x28, #176]" + "mov x20, x4", + "mov w21, w20", + "str x21, [x28, #176]" ] }, "wrfsbase rax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP15 0x0F 0xAE /2", "ExpectedArm64ASM": [ - "str x4, [x28, #176]" + "mov x20, x4", + "str x20, [x28, #176]" ] }, "stmxcsr [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "GROUP15 0x0F 0xAE /3", "ExpectedArm64ASM": [ "mov w20, #0x1f80", @@ -1567,98 +1790,127 @@ "ubfx x21, x21, #22, #3", "rbit w0, w21", "bfi x21, x0, #30, #2", - "bfi w20, w21, #13, #3", - "str w20, [x4]" + "mov w22, w20", + "bfi w22, w21, #13, #3", + "mov x20, x4", + "str w22, [x20]" ] }, "wrgsbase eax": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP15 0x0F 0xAE /3", "ExpectedArm64ASM": [ - "mov w20, w4", - "str x20, [x28, #168]" + "mov x20, x4", + "mov w21, w20", + "str x21, [x28, #168]" ] }, "wrgsbase rax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP15 0x0F 0xAE /3", "ExpectedArm64ASM": [ - "str x4, [x28, #168]" + "mov x20, x4", + "str x20, [x28, #168]" ] }, "xsave [rax]": { - "ExpectedInstructionCount": 69, + "ExpectedInstructionCount": 94, "Comment": "GROUP15 0x0F 0xAE /4", "ExpectedArm64ASM": [ - "ubfx x20, x4, #0, #1", - "cbnz x20, #+0x8", - "b #+0x84", - "ldrh w20, [x28, #1024]", - "strh w20, [x4]", - "mov w20, #0x0", - "ldrb w21, [x28, #747]", - "bfi x20, x21, #11, #3", + "mov x20, x4", + "ubfx x21, x20, #0, #1", + "cbnz x21, #+0x8", + "b #+0x8c", + "mov x20, x4", + "ldrh w21, [x28, #1024]", + "strh w21, [x20]", + "mov w21, #0x0", + "ldrb w22, [x28, #747]", + "mov x23, x21", + "bfi x23, x22, #11, #3", "ldrb w21, [x28, #744]", "ldrb w22, [x28, #745]", - "ldrb w23, [x28, #746]", - "ldrb w24, [x28, #750]", - "orr x20, x20, x21, lsl #8", - "orr x20, x20, x22, lsl #9", - "orr x20, x20, x23, lsl #10", - "orr x20, x20, x24, lsl #14", - "strh w20, [x4, #2]", - "ldrb w20, [x28, #1026]", - "strb w20, [x4, #4]", + "ldrb w24, [x28, #746]", + "ldrb w25, [x28, #750]", + "orr x30, x23, x21, lsl #8", + "orr x21, x30, x22, lsl #9", + "orr x22, x21, x24, lsl #10", + "orr x21, x22, x25, lsl #14", + "strh w21, [x20, #2]", + "ldrb w21, [x28, #1026]", + "strb w21, [x20, #4]", "ldr q2, [x28, #768]", - "str q2, [x4, #32]", + "str q2, [x20, #32]", "ldr q2, [x28, #784]", - "str q2, [x4, #48]", + "str q2, [x20, #48]", "ldr q2, [x28, #800]", - "str q2, [x4, #64]", + "str q2, [x20, #64]", "ldr q2, [x28, #816]", - "str q2, [x4, #80]", + "str q2, [x20, #80]", "ldr q2, [x28, #832]", - "str q2, [x4, #96]", + "str q2, [x20, #96]", "ldr q2, [x28, #848]", - "str q2, [x4, #112]", + "str q2, [x20, #112]", "ldr q2, [x28, #864]", - "str q2, [x4, #128]", + "str q2, [x20, #128]", "ldr q2, [x28, #880]", - "str q2, [x4, #144]", - "ubfx x20, x4, #1, #1", - "cbnz x20, #+0x8", - "b #+0x44", - "str q16, [x4, #160]", - "str q17, [x4, #176]", - "str q18, [x4, #192]", - "str q19, [x4, #208]", - "str q20, [x4, #224]", - "str q21, [x4, #240]", - "str q22, [x4, #256]", - "str q23, [x4, #272]", - "str q24, [x4, #288]", - "str q25, [x4, #304]", - "str q26, [x4, #320]", - "str q27, [x4, #336]", - "str q28, [x4, #352]", - "str q29, [x4, #368]", - "str q30, [x4, #384]", - "str q31, [x4, #400]", - "ubfx x20, x4, #1, #2", - "cbnz x20, #+0x8", - "b #+0x2c", - "mov w20, #0x1f80", - "mrs x21, fpcr", - "ubfx x21, x21, #22, #3", - "rbit w0, w21", - "bfi x21, x0, #30, #2", - "bfi w20, w21, #13, #3", - "add x21, x4, #0x18 (24)", - "str w20, [x4, #24]", + "str q2, [x20, #144]", + "mov x20, x4", + "ubfx x21, x20, #1, #1", + "cbnz x21, #+0x8", + "b #+0x88", + "mov x20, x4", + "mov v2.16b, v16.16b", + "str q2, [x20, #160]", + "mov v2.16b, v17.16b", + "str q2, [x20, #176]", + "mov v2.16b, v18.16b", + "str q2, [x20, #192]", + "mov v2.16b, v19.16b", + "str q2, [x20, #208]", + "mov v2.16b, v20.16b", + "str q2, [x20, #224]", + "mov v2.16b, v21.16b", + "str q2, [x20, #240]", + "mov v2.16b, v22.16b", + "str q2, [x20, #256]", + "mov v2.16b, v23.16b", + "str q2, [x20, #272]", + "mov v2.16b, v24.16b", + "str q2, [x20, #288]", + "mov v2.16b, v25.16b", + "str q2, [x20, #304]", + "mov v2.16b, v26.16b", + "str q2, [x20, #320]", + "mov v2.16b, v27.16b", + "str q2, [x20, #336]", + "mov v2.16b, v28.16b", + "str q2, [x20, #352]", + "mov v2.16b, v29.16b", + "str q2, [x20, #368]", + "mov v2.16b, v30.16b", + "str q2, [x20, #384]", + "mov v2.16b, v31.16b", + "str q2, [x20, #400]", + "mov x20, x4", + "ubfx x21, x20, #1, #2", + "cbnz x21, #+0x8", + "b #+0x34", + "mov x20, x4", + "mov w21, #0x1f80", + "mrs x22, fpcr", + "ubfx x22, x22, #22, #3", + "rbit w0, w22", + "bfi x22, x0, #30, #2", + "mov w23, w21", + "bfi w23, w22, #13, #3", + "add x21, x20, #0x18 (24)", + "str w23, [x20, #24]", "mov w20, #0xffff", "str w20, [x21, #4]", - "ubfx x20, x4, #0, #3", - "str x20, [x4, #512]" + "mov x20, x4", + "ubfx x21, x20, #0, #3", + "str x21, [x20, #512]" ] }, "lfence": { @@ -1669,43 +1921,45 @@ ] }, "xrstor [rax]": { - "ExpectedInstructionCount": 105, + "ExpectedInstructionCount": 128, "Comment": "GROUP15 0x0F 0xAE /5", "ExpectedArm64ASM": [ - "ldr x20, [x4, #512]", - "ubfx x20, x20, #0, #1", + "mov x20, x4", + "ldr x21, [x20, #512]", + "ubfx x20, x21, #0, #1", "cbnz x20, #+0x8", - "b #+0x84", - "ldrh w20, [x4]", - "strh w20, [x28, #1024]", - "ldrh w20, [x4, #2]", - "ubfx w21, w20, #11, #3", - "strb w21, [x28, #747]", - "ubfx w21, w20, #8, #1", - "ubfx w22, w20, #9, #1", - "ubfx w23, w20, #10, #1", - "ubfx w20, w20, #14, #1", - "strb w21, [x28, #744]", - "strb w22, [x28, #745]", - "strb w23, [x28, #746]", - "strb w20, [x28, #750]", - "ldrb w20, [x4, #4]", - "strb w20, [x28, #1026]", - "ldr q2, [x4, #32]", + "b #+0x88", + "mov x20, x4", + "ldrh w21, [x20]", + "strh w21, [x28, #1024]", + "ldrh w21, [x20, #2]", + "ubfx w22, w21, #11, #3", + "strb w22, [x28, #747]", + "ubfx w22, w21, #8, #1", + "ubfx w23, w21, #9, #1", + "ubfx w24, w21, #10, #1", + "ubfx w25, w21, #14, #1", + "strb w22, [x28, #744]", + "strb w23, [x28, #745]", + "strb w24, [x28, #746]", + "strb w25, [x28, #750]", + "ldrb w21, [x20, #4]", + "strb w21, [x28, #1026]", + "ldr q2, [x20, #32]", "str q2, [x28, #768]", - "ldr q2, [x4, #48]", + "ldr q2, [x20, #48]", "str q2, [x28, #784]", - "ldr q2, [x4, #64]", + "ldr q2, [x20, #64]", "str q2, [x28, #800]", - "ldr q2, [x4, #80]", + "ldr q2, [x20, #80]", "str q2, [x28, #816]", - "ldr q2, [x4, #96]", + "ldr q2, [x20, #96]", "str q2, [x28, #832]", - "ldr q2, [x4, #112]", + "ldr q2, [x20, #112]", "str q2, [x28, #848]", - "ldr q2, [x4, #128]", + "ldr q2, [x20, #128]", "str q2, [x28, #864]", - "ldr q2, [x4, #144]", + "ldr q2, [x20, #144]", "str q2, [x28, #880]", "b #+0x4c", "mov w20, #0x0", @@ -1726,49 +1980,70 @@ "str q2, [x28, #848]", "str q2, [x28, #864]", "str q2, [x28, #880]", - "ldr x20, [x4, #512]", - "ubfx x20, x20, #1, #1", + "mov x20, x4", + "ldr x21, [x20, #512]", + "ubfx x20, x21, #1, #1", "cbnz x20, #+0x8", + "b #+0x8c", + "mov x20, x4", + "ldr q2, [x20, #160]", + "mov v16.16b, v2.16b", + "ldr q2, [x20, #176]", + "mov v17.16b, v2.16b", + "ldr q2, [x20, #192]", + "mov v18.16b, v2.16b", + "ldr q2, [x20, #208]", + "mov v19.16b, v2.16b", + "ldr q2, [x20, #224]", + "mov v20.16b, v2.16b", + "ldr q2, [x20, #240]", + "mov v21.16b, v2.16b", + "ldr q2, [x20, #256]", + "mov v22.16b, v2.16b", + "ldr q2, [x20, #272]", + "mov v23.16b, v2.16b", + "ldr q2, [x20, #288]", + "mov v24.16b, v2.16b", + "ldr q2, [x20, #304]", + "mov v25.16b, v2.16b", + "ldr q2, [x20, #320]", + "mov v26.16b, v2.16b", + "ldr q2, [x20, #336]", + "mov v27.16b, v2.16b", + "ldr q2, [x20, #352]", + "mov v28.16b, v2.16b", + "ldr q2, [x20, #368]", + "mov v29.16b, v2.16b", + "ldr q2, [x20, #384]", + "mov v30.16b, v2.16b", + "ldr q2, [x20, #400]", + "mov v31.16b, v2.16b", "b #+0x48", - "ldr q16, [x4, #160]", - "ldr q17, [x4, #176]", - "ldr q18, [x4, #192]", - "ldr q19, [x4, #208]", - "ldr q20, [x4, #224]", - "ldr q21, [x4, #240]", - "ldr q22, [x4, #256]", - "ldr q23, [x4, #272]", - "ldr q24, [x4, #288]", - "ldr q25, [x4, #304]", - "ldr q26, [x4, #320]", - "ldr q27, [x4, #336]", - "ldr q28, [x4, #352]", - "ldr q29, [x4, #368]", - "ldr q30, [x4, #384]", - "ldr q31, [x4, #400]", - "b #+0x44", - "movi v16.2d, #0x0", - "mov v17.16b, v16.16b", - "mov v18.16b, v16.16b", - "mov v19.16b, v16.16b", - "mov v20.16b, v16.16b", - "mov v21.16b, v16.16b", - "mov v22.16b, v16.16b", - "mov v23.16b, v16.16b", - "mov v24.16b, v16.16b", - "mov v25.16b, v16.16b", - "mov v26.16b, v16.16b", - "mov v27.16b, v16.16b", - "mov v28.16b, v16.16b", - "mov v29.16b, v16.16b", - "mov v30.16b, v16.16b", - "mov v31.16b, v16.16b", - "ldr x20, [x4, #512]", - "ubfx x20, x20, #1, #2", + "movi v2.2d, #0x0", + "mov v16.16b, v2.16b", + "mov v17.16b, v2.16b", + "mov v18.16b, v2.16b", + "mov v19.16b, v2.16b", + "mov v20.16b, v2.16b", + "mov v21.16b, v2.16b", + "mov v22.16b, v2.16b", + "mov v23.16b, v2.16b", + "mov v24.16b, v2.16b", + "mov v25.16b, v2.16b", + "mov v26.16b, v2.16b", + "mov v27.16b, v2.16b", + "mov v28.16b, v2.16b", + "mov v29.16b, v2.16b", + "mov v30.16b, v2.16b", + "mov v31.16b, v2.16b", + "mov x20, x4", + "ldr x21, [x20, #512]", + "ubfx x20, x21, #1, #2", "cbnz x20, #+0x8", - "b #+0x2c", - "ldr w20, [x4, #24]", - "ubfx w20, w20, #13, #3", + "b #+0x30", + "mov x20, x4", + "ldr w21, [x20, #24]", + "ubfx w20, w21, #13, #3", "rbit w1, w20", "lsr w1, w1, #30", "mrs x0, fpcr", @@ -1787,10 +2062,11 @@ ] }, "clwb [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP15 0x0F 0xAE /6", "ExpectedArm64ASM": [ - "dc cvac, x4" + "mov x20, x4", + "dc cvac, x20" ] }, "sfence": { @@ -1801,54 +2077,60 @@ ] }, "clflush [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": "GROUP15 0x0F 0xAE /7", "ExpectedArm64ASM": [ - "dc civac, x4", + "mov x20, x4", + "dc civac, x20", "dsb ish" ] }, "clflushopt [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "GROUP15 0x0F 0xAE /7", "ExpectedArm64ASM": [ - "dc civac, x4" + "mov x20, x4", + "dc civac, x20" ] }, "prefetchnta [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "GROUP16 0x0F 0x18 /0" ], "ExpectedArm64ASM": [ - "prfm pldl1strm, [x4]" + "mov x20, x4", + "prfm pldl1strm, [x20]" ] }, "prefetcht0 [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "GROUP16 0x0F 0x18 /1" ], "ExpectedArm64ASM": [ - "prfm pldl1keep, [x4]" + "mov x20, x4", + "prfm pldl1keep, [x20]" ] }, "prefetcht1 [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "GROUP16 0x0F 0x18 /2" ], "ExpectedArm64ASM": [ - "prfm pldl2keep, [x4]" + "mov x20, x4", + "prfm pldl2keep, [x20]" ] }, "prefetcht2 [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "GROUP16 0x0F 0x18 /3" ], "ExpectedArm64ASM": [ - "prfm pldl3keep, [x4]" + "mov x20, x4", + "prfm pldl3keep, [x20]" ] }, "db 0x0f, 0x18, 0x20;": { @@ -1861,31 +2143,34 @@ "ExpectedArm64ASM": [] }, "db 0x0f, 0x0d, 0x00": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "GROUPP 0x0F 0x0D /0", "prefetch_exclusive [rax]" ], "ExpectedArm64ASM": [ - "prfm pldl1keep, [x4]" + "mov x20, x4", + "prfm pldl1keep, [x20]" ] }, "prefetchw [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "GROUPP 0x0F 0x0D /1" ], "ExpectedArm64ASM": [ - "prfm pstl1keep, [x4]" + "mov x20, x4", + "prfm pstl1keep, [x20]" ] }, "prefetchwt1 [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "GROUPP 0x0F 0x0D /2" ], "ExpectedArm64ASM": [ - "prfm pstl1keep, [x4]" + "mov x20, x4", + "prfm pstl1keep, [x20]" ] } } diff --git a/unittests/InstructionCountCI/SecondaryModRM.json b/unittests/InstructionCountCI/SecondaryModRM.json index 8d12aa2d2a..eb0007ef23 100644 --- a/unittests/InstructionCountCI/SecondaryModRM.json +++ b/unittests/InstructionCountCI/SecondaryModRM.json @@ -14,9 +14,10 @@ }, "Instructions": { "xgetbv": { - "ExpectedInstructionCount": 54, + "ExpectedInstructionCount": 57, "Comment": "0xF 0x01 /2 RM-0", "ExpectedArm64ASM": [ + "mov x20, x5", "sub sp, sp, #0xf0 (240)", "mov x3, sp", "st1 {v2.2d, v3.2d}, [x3], #32", @@ -41,7 +42,7 @@ "st1 {v20.2d, v21.2d, v22.2d, v23.2d}, [x3], #64", "st1 {v24.2d, v25.2d, v26.2d, v27.2d}, [x3], #64", "st1 {v28.2d, v29.2d, v30.2d, v31.2d}, [x3], #64", - "mov w1, w5", + "mov w1, w20", "ldr x0, [x28, #1112]", "ldr x2, [x28, #1128]", "blr x2", @@ -67,20 +68,22 @@ "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp], #64", "ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov w20, w0", - "lsr x21, x0, #32", - "mov w4, w20", - "mov w6, w21" + "mov w22, w0", + "lsr x23, x0, #32", + "mov w20, w22", + "mov w21, w23", + "mov x4, x20", + "mov x6, x21" ] }, "rdtscp": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 24, "Comment": "0xF 0x01 /7 RM-1", "ExpectedArm64ASM": [ "dmb ld", "mrs x20, S3_3_c14_c0_2", - "lsl w4, w20, #7", - "lsr x6, x20, #25", + "lsl w21, w20, #7", + "lsr x22, x20, #25", "mrs x0, nzcv", "str w0, [x28, #728]", "str x8, [x28, #40]", @@ -97,14 +100,18 @@ "msr nzcv, x8", "ldr x8, [x28, #40]", "str xzr, [x28, #1056]", - "orr x5, x0, x1, lsl #12" + "orr x20, x0, x1, lsl #12", + "mov x4, x21", + "mov x5, x20", + "mov x6, x22" ] }, "clzero rax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0xF 0x01 /7 RM-4", "ExpectedArm64ASM": [ - "dc zva, x4" + "mov x20, x4", + "dc zva, x20" ] } } diff --git a/unittests/InstructionCountCI/Secondary_32Bit.json b/unittests/InstructionCountCI/Secondary_32Bit.json index 28c6981829..d517028202 100644 --- a/unittests/InstructionCountCI/Secondary_32Bit.json +++ b/unittests/InstructionCountCI/Secondary_32Bit.json @@ -9,45 +9,55 @@ }, "Instructions": { "push fs": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xa0", "ExpectedArm64ASM": [ - "ldrh w20, [x28, #146]", - "str w20, [x8, #-4]!" + "mov w20, w8", + "ldrh w21, [x28, #146]", + "mov w22, w20", + "str w21, [x22, #-4]!", + "mov w8, w22" ] }, "pop fs": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xa1", "ExpectedArm64ASM": [ - "ldr w20, [x8]", - "add x8, x8, #0x4 (4)", - "strh w20, [x28, #146]", - "ubfx w20, w20, #3, #13", + "mov w20, w8", + "ldr w21, [x20]", + "add x22, x20, #0x4 (4)", + "mov w8, w22", + "strh w21, [x28, #146]", + "ubfx w20, w21, #3, #13", "add x0, x28, x20, lsl #2", - "ldr w20, [x0, #896]", - "str w20, [x28, #176]" + "ldr w21, [x0, #896]", + "str w21, [x28, #176]" ] }, "push gs": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xa8", "ExpectedArm64ASM": [ - "ldrh w20, [x28, #144]", - "str w20, [x8, #-4]!" + "mov w20, w8", + "ldrh w21, [x28, #144]", + "mov w22, w20", + "str w21, [x22, #-4]!", + "mov w8, w22" ] }, "pop gs": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x0f 0xa9", "ExpectedArm64ASM": [ - "ldr w20, [x8]", - "add x8, x8, #0x4 (4)", - "strh w20, [x28, #144]", - "ubfx w20, w20, #3, #13", + "mov w20, w8", + "ldr w21, [x20]", + "add x22, x20, #0x4 (4)", + "mov w8, w22", + "strh w21, [x28, #144]", + "ubfx w20, w21, #3, #13", "add x0, x28, x20, lsl #2", - "ldr w20, [x0, #896]", - "str w20, [x28, #168]" + "ldr w21, [x0, #896]", + "str w21, [x28, #168]" ] } } diff --git a/unittests/InstructionCountCI/Secondary_OpSize.json b/unittests/InstructionCountCI/Secondary_OpSize.json index 2d3b36f05d..c8bdd2bab7 100644 --- a/unittests/InstructionCountCI/Secondary_OpSize.json +++ b/unittests/InstructionCountCI/Secondary_OpSize.json @@ -17,66 +17,89 @@ "ExpectedArm64ASM": [] }, "movupd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0x66 0x0f 0x10", "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v16.16b, v2.16b" ] }, "movupd xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x10", "ExpectedArm64ASM": [ - "ldr q16, [x4]" + "mov x20, x4", + "ldr q2, [x20]", + "mov v16.16b, v2.16b" ] }, "movupd [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x11", "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "str q2, [x20]" ] }, "movlpd xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0x12", "ExpectedArm64ASM": [ - "ld1 {v16.d}[0], [x4]" + "mov x20, x4", + "mov v2.16b, v16.16b", + "mov v3.16b, v2.16b", + "ld1 {v3.d}[0], [x20]", + "mov v16.16b, v3.16b" ] }, "movlpd [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x13", "ExpectedArm64ASM": [ - "str d16, [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "str d2, [x20]" ] }, "unpcklpd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x14", "ExpectedArm64ASM": [ - "zip1 v16.2d, v16.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "zip1 v4.2d, v2.2d, v3.2d", + "mov v16.16b, v4.16b" ] }, "unpckhpd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x15", "ExpectedArm64ASM": [ - "zip2 v16.2d, v16.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "zip2 v4.2d, v2.2d, v3.2d", + "mov v16.16b, v4.16b" ] }, "movhpd xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0x16", "ExpectedArm64ASM": [ - "ld1 {v16.d}[1], [x4]" + "mov x20, x4", + "mov v2.16b, v16.16b", + "mov v3.16b, v2.16b", + "ld1 {v3.d}[1], [x20]", + "mov v16.16b, v3.16b" ] }, "movhpd [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x17", "ExpectedArm64ASM": [ - "st1 {v16.d}[1], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.d}[1], [x20]" ] }, "movapd xmm0, xmm0": { @@ -85,373 +108,493 @@ "ExpectedArm64ASM": [] }, "movapd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0x66 0x0f 0x28", "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v16.16b, v2.16b" ] }, "movapd xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x28", "ExpectedArm64ASM": [ - "ldr q16, [x4]" + "mov x20, x4", + "ldr q2, [x20]", + "mov v16.16b, v2.16b" ] }, "movapd [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x29", "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "str q2, [x20]" ] }, "cvtpi2pd xmm0, mm0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x2a", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", - "sxtl v2.2d, v2.2s", - "scvtf v16.2d, v2.2d" + "sxtl v3.2d, v2.2s", + "scvtf v2.2d, v3.2d", + "mov v16.16b, v2.16b" ] }, "movntpd [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x2b", "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "str q2, [x20]" ] }, "cvttpd2pi mm0, xmm0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x2c", "ExpectedArm64ASM": [ - "fcvtn v2.2s, v16.2d", - "fcvtzs v2.2s, v2.2s", + "mov v2.16b, v16.16b", + "fcvtn v3.2s, v2.2d", + "fcvtzs v2.2s, v3.2s", "str d2, [x28, #768]" ] }, "cvtpd2pi mm0, xmm0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0x2d", "ExpectedArm64ASM": [ - "fcvtn v2.2s, v16.2d", - "frinti v2.2s, v2.2s", + "mov v2.16b, v16.16b", + "fcvtn v3.2s, v2.2d", + "frinti v2.2s, v3.2s", "fcvtzs v2.2s, v2.2s", "str d2, [x28, #768]" ] }, "ucomisd xmm0, xmm1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 15, "Comment": "0x66 0x0f 0x2e", "ExpectedArm64ASM": [ - "fcmp d16, d17", - "mov w27, #0x0", - "cset w20, eq", - "cset w21, lo", - "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", - "msr nzcv, x20" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fcmp d2, d3", + "mov w20, #0x0", + "cset w21, eq", + "cset w22, lo", + "cset w23, vs", + "orr w24, w22, w23", + "lsl x22, x24, #29", + "orr w24, w21, w23", + "orr w21, w22, w24, lsl #30", + "eor w22, w23, #0x1", + "mov x26, x22", + "mov x27, x20", + "msr nzcv, x21" ] }, "comisd xmm0, xmm1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 15, "Comment": "0x66 0x0f 0x2f", "ExpectedArm64ASM": [ - "fcmp d16, d17", - "mov w27, #0x0", - "cset w20, eq", - "cset w21, lo", - "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", - "msr nzcv, x20" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fcmp d2, d3", + "mov w20, #0x0", + "cset w21, eq", + "cset w22, lo", + "cset w23, vs", + "orr w24, w22, w23", + "lsl x22, x24, #29", + "orr w24, w21, w23", + "orr w21, w22, w24, lsl #30", + "eor w22, w23, #0x1", + "mov x26, x22", + "mov x27, x20", + "msr nzcv, x21" ] }, "movmskpd eax, xmm0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x66 0x0f 0x50", "ExpectedArm64ASM": [ - "uzp2 v2.4s, v16.4s, v16.4s", - "mov x20, v2.d[0]", - "bfi x20, x20, #31, #32", - "lsr x4, x20, #62" + "mov v2.16b, v16.16b", + "uzp2 v3.4s, v2.4s, v2.4s", + "mov x20, v3.d[0]", + "mov x21, x20", + "bfi x21, x20, #31, #32", + "lsr x20, x21, #62", + "mov x4, x20" ] }, "sqrtpd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x51", "ExpectedArm64ASM": [ - "fsqrt v16.2d, v17.2d" + "mov v2.16b, v17.16b", + "fsqrt v3.2d, v2.2d", + "mov v16.16b, v3.16b" ] }, "addpd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x58", "ExpectedArm64ASM": [ - "fadd v16.2d, v16.2d, v17.2d" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fadd v4.2d, v3.2d, v2.2d", + "mov v16.16b, v4.16b" ] }, "mulpd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x59", "ExpectedArm64ASM": [ - "fmul v16.2d, v16.2d, v17.2d" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fmul v4.2d, v3.2d, v2.2d", + "mov v16.16b, v4.16b" ] }, "cvtpd2ps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x5a", "ExpectedArm64ASM": [ - "fcvtn v16.2s, v17.2d" + "mov v2.16b, v17.16b", + "fcvtn v3.2s, v2.2d", + "mov v16.16b, v3.16b" ] }, "cvtpd2ps xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xf2 0x0f 0x5a", "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "fcvtn v16.2s, v2.2d" + "mov x20, x4", + "ldr q2, [x20]", + "fcvtn v3.2s, v2.2d", + "mov v16.16b, v3.16b" ] }, "cvtps2dq xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x5b", "ExpectedArm64ASM": [ - "frinti v16.4s, v17.4s", - "fcvtzs v16.4s, v16.4s" + "mov v2.16b, v17.16b", + "frinti v3.4s, v2.4s", + "fcvtzs v3.4s, v3.4s", + "mov v16.16b, v3.16b" ] }, "cvtps2dq xmm0, [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0xf2 0x0f 0x5b", "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "frinti v16.4s, v2.4s", - "fcvtzs v16.4s, v16.4s" + "mov x20, x4", + "ldr q2, [x20]", + "frinti v3.4s, v2.4s", + "fcvtzs v3.4s, v3.4s", + "mov v16.16b, v3.16b" ] }, "subpd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x5c", "ExpectedArm64ASM": [ - "fsub v16.2d, v16.2d, v17.2d" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fsub v4.2d, v3.2d, v2.2d", + "mov v16.16b, v4.16b" ] }, "minpd xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0x5d", "ExpectedArm64ASM": [ - "fcmgt v0.2d, v17.2d, v16.2d", - "bif v16.16b, v17.16b, v0.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmgt v0.2d, v2.2d, v3.2d", + "mov v4.16b, v3.16b", + "bif v4.16b, v2.16b, v0.16b", + "mov v16.16b, v4.16b" ] }, "divpd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x5e", "ExpectedArm64ASM": [ - "fdiv v16.2d, v16.2d, v17.2d" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fdiv v4.2d, v3.2d, v2.2d", + "mov v16.16b, v4.16b" ] }, "maxpd xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0x5f", "ExpectedArm64ASM": [ - "fcmgt v0.2d, v17.2d, v16.2d", - "bit v16.16b, v17.16b, v0.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmgt v0.2d, v2.2d, v3.2d", + "mov v4.16b, v3.16b", + "bit v4.16b, v2.16b, v0.16b", + "mov v16.16b, v4.16b" ] }, "punpcklbw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x60", "ExpectedArm64ASM": [ - "zip1 v16.16b, v16.16b, v17.16b" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "zip1 v4.16b, v2.16b, v3.16b", + "mov v16.16b, v4.16b" ] }, "punpcklbw xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0x60", "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "zip1 v16.16b, v16.16b, v2.16b" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr q3, [x20]", + "zip1 v4.16b, v2.16b, v3.16b", + "mov v16.16b, v4.16b" ] }, "punpcklwd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x61", "ExpectedArm64ASM": [ - "zip1 v16.8h, v16.8h, v17.8h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "zip1 v4.8h, v2.8h, v3.8h", + "mov v16.16b, v4.16b" ] }, "punpcklwd xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0x61", "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "zip1 v16.8h, v16.8h, v2.8h" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr q3, [x20]", + "zip1 v4.8h, v2.8h, v3.8h", + "mov v16.16b, v4.16b" ] }, "punpckldq xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x62", "ExpectedArm64ASM": [ - "zip1 v16.4s, v16.4s, v17.4s" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "zip1 v4.4s, v2.4s, v3.4s", + "mov v16.16b, v4.16b" ] }, "punpckldq xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0x62", "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "zip1 v16.4s, v16.4s, v2.4s" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr q3, [x20]", + "zip1 v4.4s, v2.4s, v3.4s", + "mov v16.16b, v4.16b" ] }, "packsswb xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0x63", "ExpectedArm64ASM": [ - "sqxtn v16.8b, v16.8h", - "sqxtn2 v16.16b, v17.8h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "sqxtn v4.8b, v2.8h", + "sqxtn2 v4.16b, v3.8h", + "mov v16.16b, v4.16b" ] }, "packsswb xmm0, [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0x63", "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "sqxtn v16.8b, v16.8h", - "sqxtn2 v16.16b, v2.8h" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr q3, [x20]", + "sqxtn v4.8b, v2.8h", + "sqxtn2 v4.16b, v3.8h", + "mov v16.16b, v4.16b" ] }, "packsswb xmm0, xmm0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x63", "ExpectedArm64ASM": [ - "mov v0.16b, v16.16b", - "sqxtn v16.8b, v16.8h", - "sqxtn2 v16.16b, v0.8h" + "mov v2.16b, v16.16b", + "sqxtn v3.8b, v2.8h", + "sqxtn2 v3.16b, v2.8h", + "mov v16.16b, v3.16b" ] }, "pcmpgtb xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x64", "ExpectedArm64ASM": [ - "cmgt v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "cmgt v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "pcmpgtw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x65", "ExpectedArm64ASM": [ - "cmgt v16.8h, v16.8h, v17.8h" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "cmgt v4.8h, v3.8h, v2.8h", + "mov v16.16b, v4.16b" ] }, "pcmpgtd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x66", "ExpectedArm64ASM": [ - "cmgt v16.4s, v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "cmgt v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "punpckhbw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x68", "ExpectedArm64ASM": [ - "zip2 v16.16b, v16.16b, v17.16b" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "zip2 v4.16b, v2.16b, v3.16b", + "mov v16.16b, v4.16b" ] }, "punpckhbw xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0x68", "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "zip2 v16.16b, v16.16b, v2.16b" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr q3, [x20]", + "zip2 v4.16b, v2.16b, v3.16b", + "mov v16.16b, v4.16b" ] }, "punpckhwd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x69", "ExpectedArm64ASM": [ - "zip2 v16.8h, v16.8h, v17.8h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "zip2 v4.8h, v2.8h, v3.8h", + "mov v16.16b, v4.16b" ] }, "punpckhwd xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0x69", "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "zip2 v16.8h, v16.8h, v2.8h" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr q3, [x20]", + "zip2 v4.8h, v2.8h, v3.8h", + "mov v16.16b, v4.16b" ] }, "punpckhdq xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x6a", "ExpectedArm64ASM": [ - "zip2 v16.4s, v16.4s, v17.4s" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "zip2 v4.4s, v2.4s, v3.4s", + "mov v16.16b, v4.16b" ] }, "punpckhdq xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0x6a", "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "zip2 v16.4s, v16.4s, v2.4s" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr q3, [x20]", + "zip2 v4.4s, v2.4s, v3.4s", + "mov v16.16b, v4.16b" ] }, "packssdw xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0x6b", "ExpectedArm64ASM": [ - "sqxtn v16.4h, v16.4s", - "sqxtn2 v16.8h, v17.4s" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "sqxtn v4.4h, v2.4s", + "sqxtn2 v4.8h, v3.4s", + "mov v16.16b, v4.16b" ] }, "punpcklqdq xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x6c", "ExpectedArm64ASM": [ - "zip1 v16.2d, v16.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "zip1 v4.2d, v2.2d, v3.2d", + "mov v16.16b, v4.16b" ] }, "punpckhqdq xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x6d", "ExpectedArm64ASM": [ - "zip2 v16.2d, v16.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "zip2 v4.2d, v2.2d, v3.2d", + "mov v16.16b, v4.16b" ] }, "movd xmm0, dword [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x6e", "ExpectedArm64ASM": [ - "ldr s16, [x4]" + "mov x20, x4", + "ldr s2, [x20]", + "mov v16.16b, v2.16b" ] }, "movd xmm0, eax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x6e", "ExpectedArm64ASM": [ - "fmov s16, w4" + "mov x20, x4", + "fmov s2, w20", + "mov v16.16b, v2.16b" ] }, "movq xmm0, qword [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x6e", "ExpectedArm64ASM": [ - "ldr d16, [x4]" + "mov x20, x4", + "ldr d2, [x20]", + "mov v16.16b, v2.16b" ] }, "movq xmm0, rax": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x6e", "ExpectedArm64ASM": [ - "fmov d16, x4" + "mov x20, x4", + "fmov d2, x20", + "mov v16.16b, v2.16b" ] }, "movdqa xmm0, xmm0": { @@ -460,127 +603,154 @@ "ExpectedArm64ASM": [] }, "movdqa xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0x66 0x0f 0x6f", "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v16.16b, v2.16b" ] }, "movdqa xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x6f", "ExpectedArm64ASM": [ - "ldr q16, [x4]" + "mov x20, x4", + "ldr q2, [x20]", + "mov v16.16b, v2.16b" ] }, "pshufd xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Broadcast element 0", "0x66 0x0f 0x70" ], "ExpectedArm64ASM": [ - "dup v16.4s, v17.s[0]" + "mov v2.16b, v17.16b", + "dup v3.4s, v2.s[0]", + "mov v16.16b, v3.16b" ] }, "pshufd xmm0, xmm1, 11100100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Identity copy", "0x66 0x0f 0x70" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v16.16b, v2.16b" ] }, "pshufd xmm0, xmm1, 01010000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Zip with self", "0x66 0x0f 0x70" ], "ExpectedArm64ASM": [ - "zip1 v16.4s, v17.4s, v17.4s" + "mov v2.16b, v17.16b", + "zip1 v3.4s, v2.4s, v2.4s", + "mov v16.16b, v3.16b" ] }, "pshufd xmm0, [rax], 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Broadcast element 0 from memory", "0x66 0x0f 0x70" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "dup v16.4s, v2.s[0]" + "mov x20, x4", + "ldr q2, [x20]", + "dup v3.4s, v2.s[0]", + "mov v16.16b, v3.16b" ] }, "pshufd xmm0, xmm1, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "Broadcast element 0", "Element 0 becomes element 1", "0x66 0x0f 0x70" ], "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", "ldr x0, [x28, #1760]", - "ldr q2, [x0, #16]", - "tbl v16.16b, {v17.16b}, v2.16b" + "ldr q3, [x0, #16]", + "tbl v4.16b, {v2.16b}, v3.16b", + "mov v16.16b, v4.16b" ] }, "pshufd xmm0, [rax], 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "Broadcast element 0 from Memory", "Element 0 becomes element 1", "0x66 0x0f 0x70" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", + "mov x20, x4", + "ldr q2, [x20]", "ldr x0, [x28, #1760]", "ldr q3, [x0, #16]", - "tbl v16.16b, {v2.16b}, v3.16b" + "tbl v4.16b, {v2.16b}, v3.16b", + "mov v16.16b, v4.16b" ] }, "pshufd xmm0, xmm1, 0xff": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Broadcast element 3", "0x66 0x0f 0x70" ], "ExpectedArm64ASM": [ - "dup v16.4s, v17.s[3]" + "mov v2.16b, v17.16b", + "dup v3.4s, v2.s[3]", + "mov v16.16b, v3.16b" ] }, "pshufd xmm0, [rax], 0xff": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Broadcast element 3 from memory", "0x66 0x0f 0x70" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "dup v16.4s, v2.s[3]" + "mov x20, x4", + "ldr q2, [x20]", + "dup v3.4s, v2.s[3]", + "mov v16.16b, v3.16b" ] }, "pcmpeqb xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x74", "ExpectedArm64ASM": [ - "cmeq v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "cmeq v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "pcmpeqw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x75", "ExpectedArm64ASM": [ - "cmeq v16.8h, v16.8h, v17.8h" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "cmeq v4.8h, v3.8h, v2.8h", + "mov v16.16b, v4.16b" ] }, "pcmpeqd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x76", "ExpectedArm64ASM": [ - "cmeq v16.4s, v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "cmeq v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "extrq xmm0, 64, 0": { @@ -616,766 +786,1061 @@ ] }, "haddpd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0x7c", "ExpectedArm64ASM": [ - "faddp v16.2d, v16.2d, v17.2d" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "faddp v4.2d, v3.2d, v2.2d", + "mov v16.16b, v4.16b" ] }, "hsubpd xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0x7c", "ExpectedArm64ASM": [ - "uzp1 v2.2d, v16.2d, v17.2d", - "uzp2 v3.2d, v16.2d, v17.2d", - "fsub v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "uzp1 v4.2d, v2.2d, v3.2d", + "uzp2 v5.2d, v2.2d, v3.2d", + "fsub v2.2d, v4.2d, v5.2d", + "mov v16.16b, v2.16b" ] }, "movd eax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x7e", "ExpectedArm64ASM": [ - "mov w4, v16.s[0]" + "mov v2.16b, v16.16b", + "mov w20, v2.s[0]", + "mov x4, x20" ] }, "movq rax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x7e", "ExpectedArm64ASM": [ - "mov x4, v16.d[0]" + "mov v2.16b, v16.16b", + "mov x20, v2.d[0]", + "mov x4, x20" ] }, "movd dword [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x7e", "ExpectedArm64ASM": [ - "str s16, [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "str s2, [x20]" ] }, "movq qword [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x7e", "ExpectedArm64ASM": [ - "str d16, [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "str d2, [x20]" ] }, "movdqa [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0x7f", "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "str q2, [x20]" ] }, "cmppd xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xc2", "ExpectedArm64ASM": [ - "fcmeq v16.2d, v16.2d, v17.2d" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmeq v4.2d, v3.2d, v2.2d", + "mov v16.16b, v4.16b" ] }, "cmppd xmm0, xmm1, 1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xc2", "ExpectedArm64ASM": [ - "fcmgt v16.2d, v17.2d, v16.2d" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmgt v4.2d, v2.2d, v3.2d", + "mov v16.16b, v4.16b" ] }, "cmppd xmm0, xmm1, 2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xc2", "ExpectedArm64ASM": [ - "fcmge v16.2d, v17.2d, v16.2d" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmge v4.2d, v2.2d, v3.2d", + "mov v16.16b, v4.16b" ] }, "cmppd xmm0, xmm1, 3": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x66 0x0f 0xc2", "ExpectedArm64ASM": [ - "fcmge v0.2d, v16.2d, v17.2d", - "fcmgt v1.2d, v17.2d, v16.2d", - "orr v16.16b, v0.16b, v1.16b", - "mvn v16.16b, v16.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmge v0.2d, v3.2d, v2.2d", + "fcmgt v1.2d, v2.2d, v3.2d", + "orr v4.16b, v0.16b, v1.16b", + "mvn v4.16b, v4.16b", + "mov v16.16b, v4.16b" ] }, "cmppd xmm0, xmm1, 4": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc2", "ExpectedArm64ASM": [ - "fcmeq v16.2d, v16.2d, v17.2d", - "mvn v16.16b, v16.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmeq v4.2d, v3.2d, v2.2d", + "mvn v4.16b, v4.16b", + "mov v16.16b, v4.16b" ] }, "cmppd xmm0, xmm1, 5": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc2", "ExpectedArm64ASM": [ - "fcmgt v2.2d, v17.2d, v16.2d", - "mvn v16.16b, v2.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmgt v4.2d, v2.2d, v3.2d", + "mvn v2.16b, v4.16b", + "mov v16.16b, v2.16b" ] }, "cmppd xmm0, xmm1, 6": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc2", "ExpectedArm64ASM": [ - "fcmge v2.2d, v17.2d, v16.2d", - "mvn v16.16b, v2.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmge v4.2d, v2.2d, v3.2d", + "mvn v2.16b, v4.16b", + "mov v16.16b, v2.16b" ] }, "cmppd xmm0, xmm1, 7": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0xc2", "ExpectedArm64ASM": [ - "fcmge v0.2d, v16.2d, v17.2d", - "fcmgt v1.2d, v17.2d, v16.2d", - "orr v16.16b, v0.16b, v1.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "fcmge v0.2d, v3.2d, v2.2d", + "fcmgt v1.2d, v2.2d, v3.2d", + "orr v4.16b, v0.16b, v1.16b", + "mov v16.16b, v4.16b" ] }, "pinsrw xmm0, eax, 000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc4", "ExpectedArm64ASM": [ - "mov v16.h[0], w4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.h[0], w20", + "mov v16.16b, v3.16b" ] }, "pinsrw xmm0, eax, 001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc4", "ExpectedArm64ASM": [ - "mov v16.h[1], w4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.h[1], w20", + "mov v16.16b, v3.16b" ] }, "pinsrw xmm0, eax, 010b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc4", "ExpectedArm64ASM": [ - "mov v16.h[2], w4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.h[2], w20", + "mov v16.16b, v3.16b" ] }, "pinsrw xmm0, eax, 011b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc4", "ExpectedArm64ASM": [ - "mov v16.h[3], w4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.h[3], w20", + "mov v16.16b, v3.16b" ] }, "pinsrw xmm0, eax, 100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc4", "ExpectedArm64ASM": [ - "mov v16.h[4], w4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.h[4], w20", + "mov v16.16b, v3.16b" ] }, "pinsrw xmm0, eax, 101b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc4", "ExpectedArm64ASM": [ - "mov v16.h[5], w4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.h[5], w20", + "mov v16.16b, v3.16b" ] }, "pinsrw xmm0, eax, 110b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc4", "ExpectedArm64ASM": [ - "mov v16.h[6], w4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.h[6], w20", + "mov v16.16b, v3.16b" ] }, "pinsrw xmm0, eax, 111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc4", "ExpectedArm64ASM": [ - "mov v16.h[7], w4" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.h[7], w20", + "mov v16.16b, v3.16b" ] }, "pinsrw xmm0, [rax], 000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc4", "ExpectedArm64ASM": [ - "ld1 {v16.h}[0], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.h}[0], [x20]", + "mov v16.16b, v3.16b" ] }, "pinsrw xmm0, [rax], 001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc4", "ExpectedArm64ASM": [ - "ld1 {v16.h}[1], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.h}[1], [x20]", + "mov v16.16b, v3.16b" ] }, "pinsrw xmm0, [rax], 010b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc4", "ExpectedArm64ASM": [ - "ld1 {v16.h}[2], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.h}[2], [x20]", + "mov v16.16b, v3.16b" ] }, "pinsrw xmm0, [rax], 011b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc4", "ExpectedArm64ASM": [ - "ld1 {v16.h}[3], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.h}[3], [x20]", + "mov v16.16b, v3.16b" ] }, "pinsrw xmm0, [rax], 100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc4", "ExpectedArm64ASM": [ - "ld1 {v16.h}[4], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.h}[4], [x20]", + "mov v16.16b, v3.16b" ] }, "pinsrw xmm0, [rax], 101b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc4", "ExpectedArm64ASM": [ - "ld1 {v16.h}[5], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.h}[5], [x20]", + "mov v16.16b, v3.16b" ] }, "pinsrw xmm0, [rax], 110b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc4", "ExpectedArm64ASM": [ - "ld1 {v16.h}[6], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.h}[6], [x20]", + "mov v16.16b, v3.16b" ] }, "pinsrw xmm0, [rax], 111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc4", "ExpectedArm64ASM": [ - "ld1 {v16.h}[7], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "ld1 {v3.h}[7], [x20]", + "mov v16.16b, v3.16b" ] }, "pextrw eax, xmm0, 000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xc5", "ExpectedArm64ASM": [ - "umov w4, v16.h[0]" + "mov v2.16b, v16.16b", + "umov w20, v2.h[0]", + "mov x4, x20" ] }, "pextrw eax, xmm0, 001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xc5", "ExpectedArm64ASM": [ - "umov w4, v16.h[1]" + "mov v2.16b, v16.16b", + "umov w20, v2.h[1]", + "mov x4, x20" ] }, "pextrw eax, xmm0, 010b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xc5", "ExpectedArm64ASM": [ - "umov w4, v16.h[2]" + "mov v2.16b, v16.16b", + "umov w20, v2.h[2]", + "mov x4, x20" ] }, "pextrw eax, xmm0, 011b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xc5", "ExpectedArm64ASM": [ - "umov w4, v16.h[3]" + "mov v2.16b, v16.16b", + "umov w20, v2.h[3]", + "mov x4, x20" ] }, "pextrw eax, xmm0, 100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xc5", "ExpectedArm64ASM": [ - "umov w4, v16.h[4]" + "mov v2.16b, v16.16b", + "umov w20, v2.h[4]", + "mov x4, x20" ] }, "pextrw eax, xmm0, 101b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xc5", "ExpectedArm64ASM": [ - "umov w4, v16.h[5]" + "mov v2.16b, v16.16b", + "umov w20, v2.h[5]", + "mov x4, x20" ] }, "pextrw eax, xmm0, 110b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xc5", "ExpectedArm64ASM": [ - "umov w4, v16.h[6]" + "mov v2.16b, v16.16b", + "umov w20, v2.h[6]", + "mov x4, x20" ] }, "pextrw eax, xmm0, 111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xc5", "ExpectedArm64ASM": [ - "umov w4, v16.h[7]" + "mov v2.16b, v16.16b", + "umov w20, v2.h[7]", + "mov x4, x20" ] }, "pextrw [rax], xmm0, 000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xc5", "ExpectedArm64ASM": [ - "st1 {v16.h}[0], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.h}[0], [x20]" ] }, "pextrw [rax], xmm0, 001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xc5", "ExpectedArm64ASM": [ - "st1 {v16.h}[1], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.h}[1], [x20]" ] }, "pextrw [rax], xmm0, 010b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xc5", "ExpectedArm64ASM": [ - "st1 {v16.h}[2], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.h}[2], [x20]" ] }, "pextrw [rax], xmm0, 011b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xc5", "ExpectedArm64ASM": [ - "st1 {v16.h}[3], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.h}[3], [x20]" ] }, "pextrw [rax], xmm0, 100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xc5", "ExpectedArm64ASM": [ - "st1 {v16.h}[4], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.h}[4], [x20]" ] }, "pextrw [rax], xmm0, 101b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xc5", "ExpectedArm64ASM": [ - "st1 {v16.h}[5], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.h}[5], [x20]" ] }, "pextrw [rax], xmm0, 110b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xc5", "ExpectedArm64ASM": [ - "st1 {v16.h}[6], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.h}[6], [x20]" ] }, "pextrw [rax], xmm0, 111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xc5", "ExpectedArm64ASM": [ - "st1 {v16.h}[7], [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "st1 {v2.h}[7], [x20]" ] }, "shufpd xmm0, xmm1, 00b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xc6", "ExpectedArm64ASM": [ - "zip1 v16.2d, v16.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "zip1 v4.2d, v2.2d, v3.2d", + "mov v16.16b, v4.16b" ] }, "shufpd xmm0, xmm1, 01b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xc6", "ExpectedArm64ASM": [ - "ext v16.16b, v16.16b, v17.16b, #8" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "ext v4.16b, v2.16b, v3.16b, #8", + "mov v16.16b, v4.16b" ] }, "shufpd xmm0, xmm1, 10b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc6", "ExpectedArm64ASM": [ - "mov v16.d[1], v17.d[1]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.d[1], v3.d[1]", + "mov v16.16b, v4.16b" ] }, "shufpd xmm0, xmm1, 11b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xc6", "ExpectedArm64ASM": [ - "zip2 v16.2d, v16.2d, v17.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "zip2 v4.2d, v2.2d, v3.2d", + "mov v16.16b, v4.16b" ] }, "shufpd xmm1, xmm0, 00b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xc6", "ExpectedArm64ASM": [ - "zip1 v17.2d, v17.2d, v16.2d" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "zip1 v4.2d, v2.2d, v3.2d", + "mov v17.16b, v4.16b" ] }, "shufpd xmm1, xmm0, 01b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xc6", "ExpectedArm64ASM": [ - "ext v17.16b, v17.16b, v16.16b, #8" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "ext v4.16b, v2.16b, v3.16b, #8", + "mov v17.16b, v4.16b" ] }, "shufpd xmm1, xmm0, 10b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xc6", "ExpectedArm64ASM": [ - "mov v17.d[1], v16.d[1]" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "mov v4.16b, v2.16b", + "mov v4.d[1], v3.d[1]", + "mov v17.16b, v4.16b" ] }, "shufpd xmm1, xmm0, 11b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xc6", "ExpectedArm64ASM": [ - "zip2 v17.2d, v17.2d, v16.2d" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "zip2 v4.2d, v2.2d, v3.2d", + "mov v17.16b, v4.16b" ] }, "addsubpd xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0xd0", "ExpectedArm64ASM": [ - "ldr q2, [x28, #2112]", - "eor v2.16b, v17.16b, v2.16b", - "fadd v16.2d, v16.2d, v2.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "ldr q4, [x28, #2112]", + "eor v5.16b, v3.16b, v4.16b", + "fadd v3.2d, v2.2d, v5.2d", + "mov v16.16b, v3.16b" ] }, "psrlw xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x66 0x0f 0xd1", "ExpectedArm64ASM": [ - "uqshl d0, d17, #57", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.8h, v0.h[0]", "neg v0.8h, v0.8h", - "ushl v16.8h, v16.8h, v0.8h" + "ushl v4.8h, v2.8h, v0.8h", + "mov v16.16b, v4.16b" ] }, "psrld xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x66 0x0f 0xd2", "ExpectedArm64ASM": [ - "uqshl d0, d17, #57", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.4s, v0.s[0]", "neg v0.4s, v0.4s", - "ushl v16.4s, v16.4s, v0.4s" + "ushl v4.4s, v2.4s, v0.4s", + "mov v16.16b, v4.16b" ] }, "psrlq xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x66 0x0f 0xd3", "ExpectedArm64ASM": [ - "uqshl d0, d17, #57", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.2d, v0.d[0]", "neg v0.2d, v0.2d", - "ushl v16.2d, v16.2d, v0.2d" + "ushl v4.2d, v2.2d, v0.2d", + "mov v16.16b, v4.16b" ] }, "paddq xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xd4", "ExpectedArm64ASM": [ - "add v16.2d, v16.2d, v17.2d" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "add v4.2d, v3.2d, v2.2d", + "mov v16.16b, v4.16b" ] }, "pmullw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xd3", "ExpectedArm64ASM": [ - "mul v16.8h, v16.8h, v17.8h" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "mul v4.8h, v3.8h, v2.8h", + "mov v16.16b, v4.16b" ] }, "pmovmskb eax, xmm0": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0x66 0x0f 0xd7", "ExpectedArm64ASM": [ - "ldr q2, [x28, #2272]", - "cmlt v3.16b, v16.16b, #0", - "and v2.16b, v3.16b, v2.16b", - "addp v2.16b, v2.16b, v2.16b", - "addp v2.8b, v2.8b, v2.8b", - "addp v2.8b, v2.8b, v2.8b", - "umov w4, v2.h[0]" + "mov v2.16b, v16.16b", + "ldr q3, [x28, #2272]", + "cmlt v4.16b, v2.16b, #0", + "and v2.16b, v4.16b, v3.16b", + "addp v3.16b, v2.16b, v2.16b", + "addp v2.8b, v3.8b, v3.8b", + "addp v3.8b, v2.8b, v2.8b", + "umov w20, v3.h[0]", + "mov x4, x20" ] }, "psubusb xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xd8", "ExpectedArm64ASM": [ - "uqsub v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "uqsub v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "psubusw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xd9", "ExpectedArm64ASM": [ - "uqsub v16.8h, v16.8h, v17.8h" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "uqsub v4.8h, v3.8h, v2.8h", + "mov v16.16b, v4.16b" ] }, "pminub xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xda", "ExpectedArm64ASM": [ - "umin v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "umin v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "pand xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xdb", "ExpectedArm64ASM": [ - "and v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "and v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "paddusb xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xdc", "ExpectedArm64ASM": [ - "uqadd v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "uqadd v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "paddusw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xdd", "ExpectedArm64ASM": [ - "uqadd v16.8h, v16.8h, v17.8h" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "uqadd v4.8h, v3.8h, v2.8h", + "mov v16.16b, v4.16b" ] }, "pmaxub xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xde", "ExpectedArm64ASM": [ - "umax v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "umax v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "pandn xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xdf", "ExpectedArm64ASM": [ - "bic v16.16b, v17.16b, v16.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "bic v4.16b, v2.16b, v3.16b", + "mov v16.16b, v4.16b" ] }, "pavgb xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xe0", "ExpectedArm64ASM": [ - "urhadd v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "urhadd v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "psraw xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x66 0x0f 0xe1", "ExpectedArm64ASM": [ - "uqshl d0, d17, #57", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.8h, v0.h[0]", "neg v0.8h, v0.8h", - "sshl v16.8h, v16.8h, v0.8h" + "sshl v4.8h, v2.8h, v0.8h", + "mov v16.16b, v4.16b" ] }, "psrad xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x66 0x0f 0xe2", "ExpectedArm64ASM": [ - "uqshl d0, d17, #57", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.4s, v0.s[0]", "neg v0.4s, v0.4s", - "sshl v16.4s, v16.4s, v0.4s" + "sshl v4.4s, v2.4s, v0.4s", + "mov v16.16b, v4.16b" ] }, "pavgw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xe3", "ExpectedArm64ASM": [ - "urhadd v16.8h, v16.8h, v17.8h" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "urhadd v4.8h, v3.8h, v2.8h", + "mov v16.16b, v4.16b" ] }, "pmulhuw xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0xe4", "ExpectedArm64ASM": [ - "umull2 v0.4s, v16.8h, v17.8h", - "umull v16.4s, v16.4h, v17.4h", - "uzp2 v16.8h, v16.8h, v0.8h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "umull2 v0.4s, v2.8h, v3.8h", + "umull v4.4s, v2.4h, v3.4h", + "uzp2 v4.8h, v4.8h, v0.8h", + "mov v16.16b, v4.16b" ] }, "pmulhw xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0xe5", "ExpectedArm64ASM": [ - "smull2 v0.4s, v16.8h, v17.8h", - "smull v16.4s, v16.4h, v17.4h", - "uzp2 v16.8h, v16.8h, v0.8h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "smull2 v0.4s, v2.8h, v3.8h", + "smull v4.4s, v2.4h, v3.4h", + "uzp2 v4.8h, v4.8h, v0.8h", + "mov v16.16b, v4.16b" ] }, "cvttpd2dq xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xe6", "ExpectedArm64ASM": [ - "fcvtn v2.2s, v17.2d", - "fcvtzs v16.4s, v2.4s" + "mov v2.16b, v17.16b", + "fcvtn v3.2s, v2.2d", + "fcvtzs v2.4s, v3.4s", + "mov v16.16b, v2.16b" ] }, "movntdq [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0x66 0x0f 0xe7", "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "str q2, [x20]" ] }, "psubsb xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xe8", "ExpectedArm64ASM": [ - "sqsub v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "sqsub v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "psubsw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xe9", "ExpectedArm64ASM": [ - "sqsub v16.8h, v16.8h, v17.8h" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "sqsub v4.8h, v3.8h, v2.8h", + "mov v16.16b, v4.16b" ] }, "pminsw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xea", "ExpectedArm64ASM": [ - "smin v16.8h, v16.8h, v17.8h" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "smin v4.8h, v3.8h, v2.8h", + "mov v16.16b, v4.16b" ] }, "por xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xeb", "ExpectedArm64ASM": [ - "orr v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "orr v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "paddsb xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xec", "ExpectedArm64ASM": [ - "sqadd v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "sqadd v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "paddsw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xed", "ExpectedArm64ASM": [ - "sqadd v16.8h, v16.8h, v17.8h" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "sqadd v4.8h, v3.8h, v2.8h", + "mov v16.16b, v4.16b" ] }, "pmaxsw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xee", "ExpectedArm64ASM": [ - "smax v16.8h, v16.8h, v17.8h" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "smax v4.8h, v3.8h, v2.8h", + "mov v16.16b, v4.16b" ] }, "pxor xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xef", "ExpectedArm64ASM": [ - "eor v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "eor v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "psllw xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x66 0x0f 0xf1", "ExpectedArm64ASM": [ - "uqshl d0, d17, #57", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.8h, v0.h[0]", - "ushl v16.8h, v16.8h, v0.8h" + "ushl v4.8h, v2.8h, v0.8h", + "mov v16.16b, v4.16b" ] }, "pslld xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x66 0x0f 0xf2", "ExpectedArm64ASM": [ - "uqshl d0, d17, #57", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.4s, v0.s[0]", - "ushl v16.4s, v16.4s, v0.4s" + "ushl v4.4s, v2.4s, v0.4s", + "mov v16.16b, v4.16b" ] }, "psllq xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": "0x66 0x0f 0xf3", "ExpectedArm64ASM": [ - "uqshl d0, d17, #57", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "uqshl d0, d3, #57", "ushr d0, d0, #57", "dup v0.2d, v0.d[0]", - "ushl v16.2d, v16.2d, v0.2d" + "ushl v4.2d, v2.2d, v0.2d", + "mov v16.16b, v4.16b" ] }, "pmuludq xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0xf4", "ExpectedArm64ASM": [ - "uzp1 v2.4s, v16.4s, v16.4s", - "uzp1 v3.4s, v17.4s, v17.4s", - "umull v16.2d, v2.2s, v3.2s" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "uzp1 v4.4s, v2.4s, v2.4s", + "uzp1 v2.4s, v3.4s, v3.4s", + "umull v3.2d, v4.2s, v2.2s", + "mov v16.16b, v3.16b" ] }, "pmaddwd xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0xf5", "ExpectedArm64ASM": [ - "smull v2.4s, v16.4h, v17.4h", - "smull2 v3.4s, v16.8h, v17.8h", - "addp v16.4s, v2.4s, v3.4s" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "smull v4.4s, v2.4h, v3.4h", + "smull2 v5.4s, v2.8h, v3.8h", + "addp v2.4s, v4.4s, v5.4s", + "mov v16.16b, v2.16b" ] }, "psadbw xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": "0x66 0x0f 0xf6", "ExpectedArm64ASM": [ - "uabdl v2.8h, v16.8b, v17.8b", - "uabdl2 v3.8h, v16.16b, v17.16b", - "addv h2, v2.8h", - "addv h3, v3.8h", - "zip1 v16.2d, v2.2d, v3.2d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "uabdl v4.8h, v2.8b, v3.8b", + "uabdl2 v5.8h, v2.16b, v3.16b", + "addv h2, v4.8h", + "addv h3, v5.8h", + "zip1 v4.2d, v2.2d, v3.2d", + "mov v16.16b, v4.16b" ] }, "maskmovdqu xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": "0x66 0x0f 0xf7", "ExpectedArm64ASM": [ - "cmlt v2.16b, v17.16b, #0", - "ldr q3, [x11]", - "bsl v2.16b, v16.16b, v3.16b", - "str q2, [x11]" + "mov v2.16b, v17.16b", + "cmlt v3.16b, v2.16b, #0", + "mov v2.16b, v16.16b", + "mov x20, x11", + "ldr q4, [x20]", + "mov v5.16b, v3.16b", + "bsl v5.16b, v2.16b, v4.16b", + "str q5, [x20]" ] }, "psubb xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xf8", "ExpectedArm64ASM": [ - "sub v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "sub v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "psubw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xf9", "ExpectedArm64ASM": [ - "sub v16.8h, v16.8h, v17.8h" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "sub v4.8h, v3.8h, v2.8h", + "mov v16.16b, v4.16b" ] }, "psubd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xfa", "ExpectedArm64ASM": [ - "sub v16.4s, v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "sub v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "psubq xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xfb", "ExpectedArm64ASM": [ - "sub v16.2d, v16.2d, v17.2d" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "sub v4.2d, v3.2d, v2.2d", + "mov v16.16b, v4.16b" ] }, "paddb xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xfc", "ExpectedArm64ASM": [ - "add v16.16b, v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "add v4.16b, v3.16b, v2.16b", + "mov v16.16b, v4.16b" ] }, "paddw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xfd", "ExpectedArm64ASM": [ - "add v16.8h, v16.8h, v17.8h" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "add v4.8h, v3.8h, v2.8h", + "mov v16.16b, v4.16b" ] }, "paddd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xfe", "ExpectedArm64ASM": [ - "add v16.4s, v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "add v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] } } diff --git a/unittests/InstructionCountCI/Secondary_OpSize_FCMA.json b/unittests/InstructionCountCI/Secondary_OpSize_FCMA.json index e4353bb3cf..8bb63b678d 100644 --- a/unittests/InstructionCountCI/Secondary_OpSize_FCMA.json +++ b/unittests/InstructionCountCI/Secondary_OpSize_FCMA.json @@ -11,11 +11,14 @@ }, "Instructions": { "addsubpd xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0x66 0x0f 0xd0", "ExpectedArm64ASM": [ - "ext v2.16b, v17.16b, v17.16b, #8", - "fcadd v16.2d, v16.2d, v2.2d, #90" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "ext v4.16b, v3.16b, v3.16b, #8", + "fcadd v3.2d, v2.2d, v4.2d, #90", + "mov v16.16b, v3.16b" ] } } diff --git a/unittests/InstructionCountCI/Secondary_OpSize_SVE128.json b/unittests/InstructionCountCI/Secondary_OpSize_SVE128.json index 2ab3ee916b..9ab45ed750 100644 --- a/unittests/InstructionCountCI/Secondary_OpSize_SVE128.json +++ b/unittests/InstructionCountCI/Secondary_OpSize_SVE128.json @@ -10,81 +10,119 @@ }, "Instructions": { "psrlw xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0xd1", "ExpectedArm64ASM": [ - "mov z0.d, d17", - "lsr z16.h, p6/m, z16.h, z0.d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov z0.d, d3", + "movprfx z4, z2", + "lsr z4.h, p6/m, z4.h, z0.d", + "mov v16.16b, v4.16b" ] }, "psrld xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0xd2", "ExpectedArm64ASM": [ - "mov z0.d, d17", - "lsr z16.s, p6/m, z16.s, z0.d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov z0.d, d3", + "movprfx z4, z2", + "lsr z4.s, p6/m, z4.s, z0.d", + "mov v16.16b, v4.16b" ] }, "psrlq xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0xd3", "ExpectedArm64ASM": [ - "mov z0.d, d17", - "lsr z16.d, p6/m, z16.d, z0.d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov z0.d, d3", + "movprfx z4, z2", + "lsr z4.d, p6/m, z4.d, z0.d", + "mov v16.16b, v4.16b" ] }, "psraw xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0xe1", "ExpectedArm64ASM": [ - "mov z0.d, d17", - "asr z16.h, p6/m, z16.h, z0.d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov z0.d, d3", + "movprfx z4, z2", + "asr z4.h, p6/m, z4.h, z0.d", + "mov v16.16b, v4.16b" ] }, "psrad xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0xe2", "ExpectedArm64ASM": [ - "mov z0.d, d17", - "asr z16.s, p6/m, z16.s, z0.d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov z0.d, d3", + "movprfx z4, z2", + "asr z4.s, p6/m, z4.s, z0.d", + "mov v16.16b, v4.16b" ] }, "pmulhuw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xe4", "ExpectedArm64ASM": [ - "umulh z16.h, z16.h, z17.h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "umulh z4.h, z2.h, z3.h", + "mov v16.16b, v4.16b" ] }, "pmulhw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0x66 0x0f 0xe5", "ExpectedArm64ASM": [ - "smulh z16.h, z16.h, z17.h" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "smulh z4.h, z2.h, z3.h", + "mov v16.16b, v4.16b" ] }, "psllw xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0xf1", "ExpectedArm64ASM": [ - "mov z0.d, d17", - "lsl z16.h, p6/m, z16.h, z0.d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov z0.d, d3", + "movprfx z4, z2", + "lsl z4.h, p6/m, z4.h, z0.d", + "mov v16.16b, v4.16b" ] }, "pslld xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0xf2", "ExpectedArm64ASM": [ - "mov z0.d, d17", - "lsl z16.s, p6/m, z16.s, z0.d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov z0.d, d3", + "movprfx z4, z2", + "lsl z4.s, p6/m, z4.s, z0.d", + "mov v16.16b, v4.16b" ] }, "psllq xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0x66 0x0f 0xf3", "ExpectedArm64ASM": [ - "mov z0.d, d17", - "lsl z16.d, p6/m, z16.d, z0.d" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov z0.d, d3", + "movprfx z4, z2", + "lsl z4.d, p6/m, z4.d, z0.d", + "mov v16.16b, v4.16b" ] } } diff --git a/unittests/InstructionCountCI/Secondary_OpSize_SVE256.json b/unittests/InstructionCountCI/Secondary_OpSize_SVE256.json index 768bcf23c5..722f1e186e 100644 --- a/unittests/InstructionCountCI/Secondary_OpSize_SVE256.json +++ b/unittests/InstructionCountCI/Secondary_OpSize_SVE256.json @@ -9,23 +9,31 @@ }, "Instructions": { "pmulhuw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "SVE-256bit changes behaviour slightly", "0x66 0x0f 0xe4" ], "ExpectedArm64ASM": [ - "umulh z16.h, p6/m, z16.h, z17.h" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movprfx z4, z2", + "umulh z4.h, p6/m, z4.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "pmulhw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "SVE-256bit changes behaviour slightly", "0x66 0x0f 0xe5" ], "ExpectedArm64ASM": [ - "smulh z16.h, p6/m, z16.h, z17.h" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movprfx z4, z2", + "smulh z4.h, p6/m, z4.h, z3.h", + "mov z16.d, p7/m, z4.d" ] } } diff --git a/unittests/InstructionCountCI/Secondary_REP.json b/unittests/InstructionCountCI/Secondary_REP.json index 3f38dc715e..6908053173 100644 --- a/unittests/InstructionCountCI/Secondary_REP.json +++ b/unittests/InstructionCountCI/Secondary_REP.json @@ -13,285 +13,381 @@ }, "Instructions": { "movss xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0xf3 0x0f 0x10", "ExpectedArm64ASM": [ - "mov v16.s[0], v17.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov v16.16b, v4.16b" ] }, "movss xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf3 0x0f 0x10", "ExpectedArm64ASM": [ - "ldr s16, [x4]" + "mov x20, x4", + "ldr s2, [x20]", + "mov v16.16b, v2.16b" ] }, "movss [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf3 0x0f 0x11", "ExpectedArm64ASM": [ - "str s16, [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "str s2, [x20]" ] }, "movsldup xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf3 0x0f 0x12", "ExpectedArm64ASM": [ - "trn1 v16.4s, v17.4s, v17.4s" + "mov v2.16b, v17.16b", + "trn1 v3.4s, v2.4s, v2.4s", + "mov v16.16b, v3.16b" ] }, "movsldup xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xf3 0x0f 0x12", "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "trn1 v16.4s, v2.4s, v2.4s" + "mov x20, x4", + "ldr q2, [x20]", + "trn1 v3.4s, v2.4s, v2.4s", + "mov v16.16b, v3.16b" ] }, "movshdup xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf3 0x0f 0x16", "ExpectedArm64ASM": [ - "trn2 v16.4s, v17.4s, v17.4s" + "mov v2.16b, v17.16b", + "trn2 v3.4s, v2.4s, v2.4s", + "mov v16.16b, v3.16b" ] }, "movshdup xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xf3 0x0f 0x16", "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "trn2 v16.4s, v2.4s, v2.4s" + "mov x20, x4", + "ldr q2, [x20]", + "trn2 v3.4s, v2.4s, v2.4s", + "mov v16.16b, v3.16b" ] }, "cvtsi2ss xmm0, eax": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf3 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "scvtf s0, w4", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "scvtf s0, w20", + "mov v3.s[0], v0.s[0]", + "mov v16.16b, v3.16b" ] }, "cvtsi2ss xmm0, dword [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf3 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr s2, [x4]", - "scvtf s0, s2", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr s3, [x20]", + "mov v4.16b, v2.16b", + "scvtf s0, s3", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "cvtsi2ss xmm0, rax": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0xf3 0x0f 0x2a", "ExpectedArm64ASM": [ - "scvtf s0, x4", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "scvtf s0, x20", + "mov v3.s[0], v0.s[0]", + "mov v16.16b, v3.16b" ] }, "cvtsi2ss xmm0, qword [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf3 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr x20, [x4]", - "scvtf s0, x20", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr x21, [x20]", + "mov v3.16b, v2.16b", + "scvtf s0, x21", + "mov v3.s[0], v0.s[0]", + "mov v16.16b, v3.16b" ] }, "movntss [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf3 0x0f 0x2b", "ExpectedArm64ASM": [ - "str s16, [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "str s2, [x20]" ] }, "cvttss2si eax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf3 0x0f 0x2c", "ExpectedArm64ASM": [ - "fcvtzs w4, s16" + "mov v2.16b, v16.16b", + "fcvtzs w20, s2", + "mov x4, x20" ] }, "cvttss2si eax, dword [rbx]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xf3 0x0f 0x2c", "ExpectedArm64ASM": [ - "ldr s2, [x7]", - "fcvtzs w4, s2" + "mov x20, x7", + "ldr s2, [x20]", + "fcvtzs w20, s2", + "mov x4, x20" ] }, "cvttss2si rax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf3 0x0f 0x2c", "ExpectedArm64ASM": [ - "fcvtzs x4, s16" + "mov v2.16b, v16.16b", + "fcvtzs x20, s2", + "mov x4, x20" ] }, "cvttss2si rax, dword [rbx]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xf3 0x0f 0x2c", "ExpectedArm64ASM": [ - "ldr d2, [x7]", - "fcvtzs x4, s2" + "mov x20, x7", + "ldr d2, [x20]", + "fcvtzs x20, s2", + "mov x4, x20" ] }, "cvtss2si eax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xf3 0x0f 0x2d", "ExpectedArm64ASM": [ - "frinti s0, s16", - "fcvtzs w4, s0" + "mov v2.16b, v16.16b", + "frinti s0, s2", + "fcvtzs w20, s0", + "mov x4, x20" ] }, "cvtss2si eax, dword [rbx]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0xf3 0x0f 0x2d", "ExpectedArm64ASM": [ - "ldr s2, [x7]", + "mov x20, x7", + "ldr s2, [x20]", "frinti s0, s2", - "fcvtzs w4, s0" + "fcvtzs w20, s0", + "mov x4, x20" ] }, "cvtss2si rax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xf3 0x0f 0x2d", "ExpectedArm64ASM": [ - "frinti s0, s16", - "fcvtzs x4, s0" + "mov v2.16b, v16.16b", + "frinti s0, s2", + "fcvtzs x20, s0", + "mov x4, x20" ] }, "cvtss2si rax, dword [rbx]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0xf3 0x0f 0x2d", "ExpectedArm64ASM": [ - "ldr d2, [x7]", + "mov x20, x7", + "ldr d2, [x20]", "frinti s0, s2", - "fcvtzs x4, s0" + "fcvtzs x20, s0", + "mov x4, x20" ] }, "sqrtss xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0xf3 0x0f 0x51", "ExpectedArm64ASM": [ - "fsqrt s0, s17", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fsqrt s0, s3", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "rsqrtss xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "0xf3 0x0f 0x52" ], "ExpectedArm64ASM": [ + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", "fmov s0, #0x70 (1.0000)", - "fsqrt s1, s17", + "fsqrt s1, s3", "fdiv s0, s0, s1", - "mov v16.s[0], v0.s[0]" + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "rcpss xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf3 0x0f 0x53" ], "ExpectedArm64ASM": [ + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", "fmov s0, #0x70 (1.0000)", - "fdiv s0, s0, s17", - "mov v16.s[0], v0.s[0]" + "fdiv s0, s0, s3", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "addss xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf3 0x0f 0x58" ], "ExpectedArm64ASM": [ - "fadd s0, s16, s17", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fadd s0, s2, s3", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "mulss xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf3 0x0f 0x59" ], "ExpectedArm64ASM": [ - "fmul s0, s16, s17", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fmul s0, s2, s3", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "cvtss2sd xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": "0xf3 0x0f 0x5a", "ExpectedArm64ASM": [ - "fcvt d0, s17", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcvt d0, s3", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "cvtss2sd xmm0, [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": "0xf3 0x0f 0x5a", "ExpectedArm64ASM": [ - "ldr d2, [x4]", - "fcvt d0, s2", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr d3, [x20]", + "mov v4.16b, v2.16b", + "fcvt d0, s3", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "cvttps2dq xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf3 0x0f 0x5b", "ExpectedArm64ASM": [ - "fcvtzs v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "fcvtzs v3.4s, v2.4s", + "mov v16.16b, v3.16b" ] }, "subss xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf3 0x0f 0x5c" ], "ExpectedArm64ASM": [ - "fsub s0, s16, s17", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fsub s0, s2, s3", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "minss xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0xf3 0x0f 0x5d" ], "ExpectedArm64ASM": [ + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", "mrs x20, nzcv", - "fcmp s16, s17", - "fcsel s0, s16, s17, mi", - "mov v16.s[0], v0.s[0]", + "mov v4.16b, v2.16b", + "fcmp s2, s3", + "fcsel s0, s2, s3, mi", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b", "msr nzcv, x20" ] }, "divss xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf3 0x0f 0x5e" ], "ExpectedArm64ASM": [ - "fdiv s0, s16, s17", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fdiv s0, s2, s3", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "maxss xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0xf3 0x0f 0x5f" ], "ExpectedArm64ASM": [ + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", "mrs x20, nzcv", - "fcmp s16, s17", - "fcsel s0, s17, s16, mi", - "mov v16.s[0], v0.s[0]", + "mov v4.16b, v2.16b", + "fcmp s2, s3", + "fcsel s0, s3, s2, mi", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b", "msr nzcv, x20" ] }, @@ -301,334 +397,421 @@ "ExpectedArm64ASM": [] }, "movdqu xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0xf3 0x0f 0x6f", "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v16.16b, v2.16b" ] }, "movdqu xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf3 0x0f 0x6f", "ExpectedArm64ASM": [ - "ldr q16, [x4]" + "mov x20, x4", + "ldr q2, [x20]", + "mov v16.16b, v2.16b" ] }, "pshufhw xmm0, xmm1, 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Broadcast upper-half element 0", "0xf3 0x0f 0x70" ], "ExpectedArm64ASM": [ - "dup v2.8h, v17.h[4]", - "trn1 v16.2d, v17.2d, v2.2d" + "mov v2.16b, v17.16b", + "dup v3.8h, v2.h[4]", + "trn1 v4.2d, v2.2d, v3.2d", + "mov v16.16b, v4.16b" ] }, "pshufhw xmm0, xmm1, 11100100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Identity copy", "0xf3 0x0f 0x70" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v16.16b, v2.16b" ] }, "pshufhw xmm0, xmm1, 01010000b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Upper elements Self-zip", "0xf3 0x0f 0x70" ], "ExpectedArm64ASM": [ - "zip1 v2.8h, v17.8h, v17.8h", - "zip1 v16.2d, v17.2d, v2.2d" + "mov v2.16b, v17.16b", + "zip1 v3.8h, v2.8h, v2.8h", + "zip1 v4.2d, v2.2d, v3.2d", + "mov v16.16b, v4.16b" ] }, "pshufhw xmm0, xmm1, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "Broadcast element 0 in the upper-half", "Upper-half Element 0 gets turned in to element 1", "0xf3 0x0f 0x70" ], "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", "ldr x0, [x28, #1752]", - "ldr q2, [x0, #16]", - "tbl v16.16b, {v17.16b}, v2.16b" + "ldr q3, [x0, #16]", + "tbl v4.16b, {v2.16b}, v3.16b", + "mov v16.16b, v4.16b" ] }, "pshufhw xmm0, xmm1, 0xff": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Broadcast upper-half Element 3", "0xf3 0x0f 0x70" ], "ExpectedArm64ASM": [ - "dup v2.8h, v17.h[7]", - "trn1 v16.2d, v17.2d, v2.2d" + "mov v2.16b, v17.16b", + "dup v3.8h, v2.h[7]", + "trn1 v4.2d, v2.2d, v3.2d", + "mov v16.16b, v4.16b" ] }, "movq xmm0, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf3 0x0f 0x7e", "ExpectedArm64ASM": [ - "mov v16.8b, v16.8b" + "mov v2.16b, v16.16b", + "mov v3.8b, v2.8b", + "mov v16.16b, v3.16b" ] }, "movq xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf3 0x0f 0x7e", "ExpectedArm64ASM": [ - "mov v16.8b, v17.8b" + "mov v2.16b, v17.16b", + "mov v3.8b, v2.8b", + "mov v16.16b, v3.16b" ] }, "movq xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf3 0x0f 0x7e", "ExpectedArm64ASM": [ - "ldr d16, [x4]" + "mov x20, x4", + "ldr d2, [x20]", + "mov v16.16b, v2.16b" ] }, "movdqu [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf3 0x0f 0x7f", "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "str q2, [x20]" ] }, "popcnt ax, bx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 15, "Comment": "0xf3 0x0f 0xb8", "ExpectedArm64ASM": [ - "uxth w20, w7", - "fmov s0, w20", + "mov x20, x7", + "uxth w21, w20", + "fmov s0, w21", "cnt v0.8b, v0.8b", "addp v0.8b, v0.8b, v0.8b", "umov w20, v0.b[0]", - "bfxil x4, x20, #0, #16", + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22", "tst w20, w20", - "mov w26, #0x1", - "mov w27, #0x0" + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20" ] }, "popcnt eax, ebx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0xf3 0x0f 0xb8", "ExpectedArm64ASM": [ - "fmov s0, w7", + "mov x20, x7", + "fmov s0, w20", "cnt v0.8b, v0.8b", "addv b0, v0.8b", - "umov w4, v0.b[0]", - "tst w4, w4", - "mov w26, #0x1", - "mov w27, #0x0" + "umov w21, v0.b[0]", + "mov x4, x21", + "tst w21, w21", + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20" ] }, "popcnt rax, rbx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": "0xf3 0x0f 0xb8", "ExpectedArm64ASM": [ - "fmov d0, x7", + "mov x20, x7", + "fmov d0, x20", "cnt v0.8b, v0.8b", "addv b0, v0.8b", - "umov w4, v0.b[0]", - "tst w4, w4", - "mov w26, #0x1", - "mov w27, #0x0" + "umov w21, v0.b[0]", + "mov x4, x21", + "tst w21, w21", + "mov w20, #0x1", + "mov x26, x20", + "mov w20, #0x0", + "mov x27, x20" ] }, "tzcnt ax, bx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": "0xf3 0x0f 0xbc", "ExpectedArm64ASM": [ - "rbit w20, w7", - "orr w20, w20, #0x8000", - "clz w20, w20", - "bfxil x4, x20, #0, #16", - "cmn wzr, w20, lsl #16", - "ubfx x20, x20, #4, #1", + "mov x20, x7", + "rbit w21, w20", + "orr w21, w21, #0x8000", + "clz w21, w21", + "mov x20, x4", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22", + "cmn wzr, w21, lsl #16", + "ubfx x20, x21, #4, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "orr w22, w21, w20, lsl #29", + "msr nzcv, x22" ] }, "tzcnt eax, ebx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0xf3 0x0f 0xbc", "ExpectedArm64ASM": [ - "rbit w4, w7", - "clz w4, w4", - "tst w4, w4", - "ubfx x20, x4, #5, #1", + "mov x20, x7", + "rbit w21, w20", + "clz w21, w21", + "mov x4, x21", + "tst w21, w21", + "ubfx x20, x21, #5, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "orr w22, w21, w20, lsl #29", + "msr nzcv, x22" ] }, "tzcnt rax, rbx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": "0xf3 0x0f 0xbc", "ExpectedArm64ASM": [ - "rbit x4, x7", - "clz x4, x4", - "tst x4, x4", - "ubfx x20, x4, #6, #1", + "mov x20, x7", + "rbit x21, x20", + "clz x21, x21", + "mov x4, x21", + "tst x21, x21", + "ubfx x20, x21, #6, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "orr w22, w21, w20, lsl #29", + "msr nzcv, x22" ] }, "lzcnt ax, bx": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 13, "Comment": "0xf3 0x0f 0xbd", "ExpectedArm64ASM": [ - "lsl w20, w7, #16", - "orr w20, w20, #0x8000", - "clz w20, w20", - "bfxil x4, x20, #0, #16", - "cmn wzr, w20, lsl #16", - "ubfx x20, x20, #4, #1", + "mov x20, x7", + "lsl w21, w20, #16", + "orr w21, w21, #0x8000", + "clz w21, w21", + "mov x20, x4", + "mov x22, x20", + "bfxil x22, x21, #0, #16", + "mov x4, x22", + "cmn wzr, w21, lsl #16", + "ubfx x20, x21, #4, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "orr w22, w21, w20, lsl #29", + "msr nzcv, x22" ] }, "lzcnt eax, ebx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "0xf3 0x0f 0xbd", "ExpectedArm64ASM": [ - "clz w4, w7", - "tst w4, w4", - "ubfx x20, x4, #5, #1", + "mov x20, x7", + "clz w21, w20", + "mov x4, x21", + "tst w21, w21", + "ubfx x20, x21, #5, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "orr w22, w21, w20, lsl #29", + "msr nzcv, x22" ] }, "lzcnt rax, rbx": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": "0xf3 0x0f 0xbd", "ExpectedArm64ASM": [ - "clz x4, x7", - "tst x4, x4", - "ubfx x20, x4, #6, #1", + "mov x20, x7", + "clz x21, x20", + "mov x4, x21", + "tst x21, x21", + "ubfx x20, x21, #6, #1", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "orr w22, w21, w20, lsl #29", + "msr nzcv, x22" ] }, "cmpss xmm0, xmm1, 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmeq s0, s16, s17", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmeq s0, s2, s3", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "cmpss xmm0, xmm1, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmgt s0, s17, s16", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmgt s0, s3, s2", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "cmpss xmm0, xmm1, 2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge s0, s17, s16", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmge s0, s3, s2", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "cmpss xmm0, xmm1, 3": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge s0, s16, s17", - "fcmgt s1, s17, s16", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmge s0, s2, s3", + "fcmgt s1, s3, s2", "orr v0.8b, v0.8b, v1.8b", "mvn v0.8b, v0.8b", - "mov v16.s[0], v0.s[0]" + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "cmpss xmm0, xmm1, 4": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmeq s0, s16, s17", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmeq s0, s2, s3", "mvn v0.8b, v0.8b", - "mov v16.s[0], v0.s[0]" + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "cmpss xmm0, xmm1, 5": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmgt s2, s17, s16", - "mvn v2.16b, v2.16b", - "mov v16.s[0], v2.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fcmgt s4, s3, s2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov v16.16b, v4.16b" ] }, "cmpss xmm0, xmm1, 6": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge s2, s17, s16", - "mvn v2.16b, v2.16b", - "mov v16.s[0], v2.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fcmge s4, s3, s2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov v16.16b, v4.16b" ] }, "cmpss xmm0, xmm1, 7": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "0xf3 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge s0, s16, s17", - "fcmgt s1, s17, s16", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmge s0, s2, s3", + "fcmgt s1, s3, s2", "orr v0.8b, v0.8b, v1.8b", - "mov v16.s[0], v0.s[0]" + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "movq2dq xmm0, mm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0xf3 0x0f 0xd6", "ExpectedArm64ASM": [ - "ldr d16, [x28, #768]" + "ldr d2, [x28, #768]", + "mov v16.16b, v2.16b" ] }, "cvtdq2pd xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xf3 0x0f 0xe6", "ExpectedArm64ASM": [ - "sxtl v2.2d, v17.2s", - "scvtf v16.2d, v2.2d" + "mov v2.16b, v17.16b", + "sxtl v3.2d, v2.2s", + "scvtf v2.2d, v3.2d", + "mov v16.16b, v2.16b" ] }, "cvtdq2pd xmm0, [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0xf3 0x0f 0xe6", "ExpectedArm64ASM": [ - "ldr d2, [x4]", - "sxtl v2.2d, v2.2s", - "scvtf v16.2d, v2.2d" + "mov x20, x4", + "ldr d2, [x20]", + "sxtl v3.2d, v2.2s", + "scvtf v2.2d, v3.2d", + "mov v16.16b, v2.16b" ] } } diff --git a/unittests/InstructionCountCI/Secondary_REPNE.json b/unittests/InstructionCountCI/Secondary_REPNE.json index d1481c81e9..d74741ceec 100644 --- a/unittests/InstructionCountCI/Secondary_REPNE.json +++ b/unittests/InstructionCountCI/Secondary_REPNE.json @@ -11,305 +11,396 @@ }, "Instructions": { "movsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": "0xf2 0x0f 0x10", "ExpectedArm64ASM": [ - "mov v16.d[0], v17.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov v16.16b, v4.16b" ] }, "movsd xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf2 0x0f 0x10", "ExpectedArm64ASM": [ - "ldr d16, [x4]" + "mov x20, x4", + "ldr d2, [x20]", + "mov v16.16b, v2.16b" ] }, "movsd [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf2 0x0f 0x11", "ExpectedArm64ASM": [ - "str d16, [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "str d2, [x20]" ] }, "movddup xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf2 0x0f 0x12", "ExpectedArm64ASM": [ - "dup v16.2d, v17.d[0]" + "mov v2.16b, v17.16b", + "dup v3.2d, v2.d[0]", + "mov v16.16b, v3.16b" ] }, "movddup xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xf2 0x0f 0x12", "ExpectedArm64ASM": [ - "ldr d2, [x4]", - "dup v16.2d, v2.d[0]" + "mov x20, x4", + "ldr d2, [x20]", + "dup v3.2d, v2.d[0]", + "mov v16.16b, v3.16b" ] }, "cvtsi2sd xmm0, eax": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf2 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "scvtf d0, w4", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "scvtf d0, w20", + "mov v3.d[0], v0.d[0]", + "mov v16.16b, v3.16b" ] }, "cvtsi2sd xmm0, dword [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf2 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr w20, [x4]", - "scvtf d0, w20", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr w21, [x20]", + "mov v3.16b, v2.16b", + "scvtf d0, w21", + "mov v3.d[0], v0.d[0]", + "mov v16.16b, v3.16b" ] }, "cvtsi2sd xmm0, rax": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf2 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "scvtf d0, x4", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "mov v3.16b, v2.16b", + "scvtf d0, x20", + "mov v3.d[0], v0.d[0]", + "mov v16.16b, v3.16b" ] }, "cvtsi2sd xmm0, qword [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf2 0x0f 0x2a" ], "ExpectedArm64ASM": [ - "ldr d2, [x4]", - "scvtf d0, d2", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr d3, [x20]", + "mov v4.16b, v2.16b", + "scvtf d0, d3", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "movntsd [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf2 0x0f 0x2b", "ExpectedArm64ASM": [ - "str d16, [x4]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "str d2, [x20]" ] }, "cvttsd2si eax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf2 0x0f 0x2c", "ExpectedArm64ASM": [ - "fcvtzs w4, d16" + "mov v2.16b, v16.16b", + "fcvtzs w20, d2", + "mov x4, x20" ] }, "cvttsd2si eax, qword [rbx]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xf2 0x0f 0x2c", "ExpectedArm64ASM": [ - "ldr d2, [x7]", - "fcvtzs w4, d2" + "mov x20, x7", + "ldr d2, [x20]", + "fcvtzs w20, d2", + "mov x4, x20" ] }, "cvttsd2si rax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf2 0x0f 0x2c", "ExpectedArm64ASM": [ - "fcvtzs x4, d16" + "mov v2.16b, v16.16b", + "fcvtzs x20, d2", + "mov x4, x20" ] }, "cvttsd2si rax, qword [rbx]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xf2 0x0f 0x2c", "ExpectedArm64ASM": [ - "ldr d2, [x7]", - "fcvtzs x4, d2" + "mov x20, x7", + "ldr d2, [x20]", + "fcvtzs x20, d2", + "mov x4, x20" ] }, "cvtsd2si eax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xf2 0x0f 0x2d", "ExpectedArm64ASM": [ - "frinti d0, d16", - "fcvtzs x4, d0" + "mov v2.16b, v16.16b", + "frinti d0, d2", + "fcvtzs x20, d0", + "mov x4, x20" ] }, "cvtsd2si eax, qword [rbx]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0xf2 0x0f 0x2d", "ExpectedArm64ASM": [ - "ldr d2, [x7]", + "mov x20, x7", + "ldr d2, [x20]", "frinti d0, d2", - "fcvtzs x4, d0" + "fcvtzs x20, d0", + "mov x4, x20" ] }, "cvtsd2si rax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": "0xf2 0x0f 0x2d", "ExpectedArm64ASM": [ - "frinti d0, d16", - "fcvtzs x4, d0" + "mov v2.16b, v16.16b", + "frinti d0, d2", + "fcvtzs x20, d0", + "mov x4, x20" ] }, "cvtsd2si rax, qword [rbx]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0xf2 0x0f 0x2d", "ExpectedArm64ASM": [ - "ldr d2, [x7]", + "mov x20, x7", + "ldr d2, [x20]", "frinti d0, d2", - "fcvtzs x4, d0" + "fcvtzs x20, d0", + "mov x4, x20" ] }, "sqrtsd xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf2 0x0f 0x51" ], "ExpectedArm64ASM": [ - "fsqrt d0, d17", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fsqrt d0, d3", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "addsd xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf2 0x0f 0x58" ], "ExpectedArm64ASM": [ - "fadd d0, d16, d17", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fadd d0, d2, d3", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "mulsd xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf2 0x0f 0x59" ], "ExpectedArm64ASM": [ - "fmul d0, d16, d17", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fmul d0, d2, d3", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "cvtsd2ss xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf2 0x0f 0x5a" ], "ExpectedArm64ASM": [ - "fcvt s0, d17", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcvt s0, d3", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "cvtsd2ss xmm0, [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf2 0x0f 0x5a" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "fcvt s0, d2", - "mov v16.s[0], v0.s[0]" + "mov v2.16b, v16.16b", + "mov x20, x4", + "ldr q3, [x20]", + "mov v4.16b, v2.16b", + "fcvt s0, d3", + "mov v4.s[0], v0.s[0]", + "mov v16.16b, v4.16b" ] }, "subsd xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf2 0x0f 0x5c" ], "ExpectedArm64ASM": [ - "fsub d0, d16, d17", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fsub d0, d2, d3", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "minsd xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0xf2 0x0f 0x5d" ], "ExpectedArm64ASM": [ + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", "mrs x20, nzcv", - "fcmp d16, d17", - "fcsel d0, d16, d17, mi", - "mov v16.d[0], v0.d[0]", + "mov v4.16b, v2.16b", + "fcmp d2, d3", + "fcsel d0, d2, d3, mi", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b", "msr nzcv, x20" ] }, "divsd xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf2 0x0f 0x5e" ], "ExpectedArm64ASM": [ - "fdiv d0, d16, d17", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fdiv d0, d2, d3", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "maxsd xmm0, xmm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0xf2 0x0f 0x5f" ], "ExpectedArm64ASM": [ + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", "mrs x20, nzcv", - "fcmp d16, d17", - "fcsel d0, d17, d16, mi", - "mov v16.d[0], v0.d[0]", + "mov v4.16b, v2.16b", + "fcmp d2, d3", + "fcsel d0, d3, d2, mi", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b", "msr nzcv, x20" ] }, "pshuflw xmm0, xmm1, 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Broadcast element 0", "0xf2 0x0f 0x70" ], "ExpectedArm64ASM": [ - "dup v2.8h, v17.h[0]", - "trn2 v16.2d, v2.2d, v17.2d" + "mov v2.16b, v17.16b", + "dup v3.8h, v2.h[0]", + "trn2 v4.2d, v3.2d, v2.2d", + "mov v16.16b, v4.16b" ] }, "pshuflw xmm0, xmm1, 11100100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Identity copy", "0xf2 0x0f 0x70" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov v2.16b, v17.16b", + "mov v16.16b, v2.16b" ] }, "pshuflw xmm0, xmm1, 01010000b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Lower elements Self-zip", "0xf2 0x0f 0x70" ], "ExpectedArm64ASM": [ - "zip1 v2.8h, v17.8h, v17.8h", - "zip1 v16.2d, v2.2d, v17.2d" + "mov v2.16b, v17.16b", + "zip1 v3.8h, v2.8h, v2.8h", + "zip1 v4.2d, v3.2d, v2.2d", + "mov v16.16b, v4.16b" ] }, "pshuflw xmm0, xmm1, 1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "Broadcast first element in to Elements 1,2,3", "Element 0 gets turned in to element 1", "0xf2 0x0f 0x70" ], "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", "ldr x0, [x28, #1744]", - "ldr q2, [x0, #16]", - "tbl v16.16b, {v17.16b}, v2.16b" + "ldr q3, [x0, #16]", + "tbl v4.16b, {v2.16b}, v3.16b", + "mov v16.16b, v4.16b" ] }, "pshuflw xmm0, xmm1, 0xff": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Broadcast Element 3", "0xf2 0x0f 0x70" ], "ExpectedArm64ASM": [ - "dup v2.8h, v17.h[3]", - "trn2 v16.2d, v2.2d, v17.2d" + "mov v2.16b, v17.16b", + "dup v3.8h, v2.h[3]", + "trn2 v4.2d, v3.2d, v2.2d", + "mov v16.16b, v4.16b" ] }, "insertq xmm0, xmm1, 0, 0": { @@ -345,139 +436,185 @@ ] }, "haddps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": "0xf2 0x0f 0x7c", "ExpectedArm64ASM": [ - "faddp v16.4s, v16.4s, v17.4s" + "mov v2.16b, v17.16b", + "mov v3.16b, v16.16b", + "faddp v4.4s, v3.4s, v2.4s", + "mov v16.16b, v4.16b" ] }, "hsubps xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0xf2 0x0f 0x7d", "ExpectedArm64ASM": [ - "uzp1 v2.4s, v16.4s, v17.4s", - "uzp2 v3.4s, v16.4s, v17.4s", - "fsub v16.4s, v2.4s, v3.4s" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "uzp1 v4.4s, v2.4s, v3.4s", + "uzp2 v5.4s, v2.4s, v3.4s", + "fsub v2.4s, v4.4s, v5.4s", + "mov v16.16b, v2.16b" ] }, "cmpsd xmm0, xmm1, 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmeq d0, d16, d17", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmeq d0, d2, d3", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "cmpsd xmm0, xmm1, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmgt d0, d17, d16", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmgt d0, d3, d2", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "cmpsd xmm0, xmm1, 2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge d0, d17, d16", - "mov v16.d[0], v0.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmge d0, d3, d2", + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "cmpsd xmm0, xmm1, 3": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge d0, d16, d17", - "fcmgt d1, d17, d16", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmge d0, d2, d3", + "fcmgt d1, d3, d2", "orr v0.8b, v0.8b, v1.8b", "mvn v0.8b, v0.8b", - "mov v16.d[0], v0.d[0]" + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "cmpsd xmm0, xmm1, 4": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmeq d0, d16, d17", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmeq d0, d2, d3", "mvn v0.8b, v0.8b", - "mov v16.d[0], v0.d[0]" + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "cmpsd xmm0, xmm1, 5": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmgt d2, d17, d16", - "mvn v2.16b, v2.16b", - "mov v16.d[0], v2.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fcmgt d4, d3, d2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov v16.16b, v4.16b" ] }, "cmpsd xmm0, xmm1, 6": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge d2, d17, d16", - "mvn v2.16b, v2.16b", - "mov v16.d[0], v2.d[0]" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "fcmge d4, d3, d2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov v16.16b, v4.16b" ] }, "cmpsd xmm0, xmm1, 7": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "0xf2 0x0f 0xc2" ], "ExpectedArm64ASM": [ - "fcmge d0, d16, d17", - "fcmgt d1, d17, d16", + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "mov v4.16b, v2.16b", + "fcmge d0, d2, d3", + "fcmgt d1, d3, d2", "orr v0.8b, v0.8b, v1.8b", - "mov v16.d[0], v0.d[0]" + "mov v4.d[0], v0.d[0]", + "mov v16.16b, v4.16b" ] }, "addsubps xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": "0xf2 0x0f 0xd0", "ExpectedArm64ASM": [ - "ldr q2, [x28, #2080]", - "eor v2.16b, v17.16b, v2.16b", - "fadd v16.4s, v16.4s, v2.4s" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "ldr q4, [x28, #2080]", + "eor v5.16b, v3.16b, v4.16b", + "fadd v3.4s, v2.4s, v5.4s", + "mov v16.16b, v3.16b" ] }, "movdq2q mm0, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": "0xf2 0x0f 0xd6", "ExpectedArm64ASM": [ - "str d16, [x28, #768]" + "mov v2.16b, v16.16b", + "str d2, [x28, #768]" ] }, "cvtpd2dq xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": "0xf2 0x0f 0xe6", "ExpectedArm64ASM": [ - "fcvtn v2.2s, v17.2d", - "frinti v16.4s, v2.4s", - "fcvtzs v16.4s, v16.4s" + "mov v2.16b, v17.16b", + "fcvtn v3.2s, v2.2d", + "frinti v2.4s, v3.4s", + "fcvtzs v2.4s, v2.4s", + "mov v16.16b, v2.16b" ] }, "lddqu xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": "0xf2 0x0f 0xf0", "ExpectedArm64ASM": [ - "ldr q16, [x4]" + "mov x20, x4", + "ldr q2, [x20]", + "mov v16.16b, v2.16b" ] } } diff --git a/unittests/InstructionCountCI/Secondary_REPNE_FCMA.json b/unittests/InstructionCountCI/Secondary_REPNE_FCMA.json index c3cb468848..2842cea4bc 100644 --- a/unittests/InstructionCountCI/Secondary_REPNE_FCMA.json +++ b/unittests/InstructionCountCI/Secondary_REPNE_FCMA.json @@ -11,11 +11,14 @@ }, "Instructions": { "addsubps xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": "0xf2 0x0f 0xd0", "ExpectedArm64ASM": [ - "rev64 v2.4s, v17.4s", - "fcadd v16.4s, v16.4s, v2.4s, #90" + "mov v2.16b, v16.16b", + "mov v3.16b, v17.16b", + "rev64 v4.4s, v3.4s", + "fcadd v3.4s, v2.4s, v4.4s, #90", + "mov v16.16b, v3.16b" ] } } diff --git a/unittests/InstructionCountCI/Secondary_SVE128.json b/unittests/InstructionCountCI/Secondary_SVE128.json index e7dc32671c..33c004fd4a 100644 --- a/unittests/InstructionCountCI/Secondary_SVE128.json +++ b/unittests/InstructionCountCI/Secondary_SVE128.json @@ -10,105 +10,117 @@ }, "Instructions": { "movmskps eax, xmm0": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x50", "ExpectedArm64ASM": [ - "ushr v2.4s, v16.4s, #31", - "index z3.s, #0, #1", - "ushl v2.4s, v2.4s, v3.4s", - "addv s2, v2.4s", - "mov w4, v2.s[0]" + "mov v2.16b, v16.16b", + "ushr v3.4s, v2.4s, #31", + "index z2.s, #0, #1", + "ushl v4.4s, v3.4s, v2.4s", + "addv s2, v4.4s", + "mov w20, v2.s[0]", + "mov x4, x20" ] }, "movmskps rax, xmm0": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": "0x0f 0x50", "ExpectedArm64ASM": [ - "ushr v2.4s, v16.4s, #31", - "index z3.s, #0, #1", - "ushl v2.4s, v2.4s, v3.4s", - "addv s2, v2.4s", - "mov w4, v2.s[0]" + "mov v2.16b, v16.16b", + "ushr v3.4s, v2.4s, #31", + "index z2.s, #0, #1", + "ushl v4.4s, v3.4s, v2.4s", + "addv s2, v4.4s", + "mov w20, v2.s[0]", + "mov x4, x20" ] }, "psrlw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xd1", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "lsr z2.h, p6/m, z2.h, z3.d", - "str d2, [x28, #768]" + "movprfx z4, z2", + "lsr z4.h, p6/m, z4.h, z3.d", + "str d4, [x28, #768]" ] }, "psrld mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xd2", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "lsr z2.s, p6/m, z2.s, z3.d", - "str d2, [x28, #768]" + "movprfx z4, z2", + "lsr z4.s, p6/m, z4.s, z3.d", + "str d4, [x28, #768]" ] }, "psrlq mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xd3", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "lsr z2.d, p6/m, z2.d, z3.d", - "str d2, [x28, #768]" + "movprfx z4, z2", + "lsr z4.d, p6/m, z4.d, z3.d", + "str d4, [x28, #768]" ] }, "psraw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xe1", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "asr z2.h, p6/m, z2.h, z3.d", - "str d2, [x28, #768]" + "movprfx z4, z2", + "asr z4.h, p6/m, z4.h, z3.d", + "str d4, [x28, #768]" ] }, "psrad mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xe2", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "asr z2.s, p6/m, z2.s, z3.d", - "str d2, [x28, #768]" + "movprfx z4, z2", + "asr z4.s, p6/m, z4.s, z3.d", + "str d4, [x28, #768]" ] }, "psllw mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xf1", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "lsl z2.h, p6/m, z2.h, z3.d", - "str d2, [x28, #768]" + "movprfx z4, z2", + "lsl z4.h, p6/m, z4.h, z3.d", + "str d4, [x28, #768]" ] }, "pslld mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xf2", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "lsl z2.s, p6/m, z2.s, z3.d", - "str d2, [x28, #768]" + "movprfx z4, z2", + "lsl z4.s, p6/m, z4.s, z3.d", + "str d4, [x28, #768]" ] }, "psllq mm0, mm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": "0x0f 0xf3", "ExpectedArm64ASM": [ "ldr d2, [x28, #768]", "ldr d3, [x28, #784]", - "lsl z2.d, p6/m, z2.d, z3.d", - "str d2, [x28, #768]" + "movprfx z4, z2", + "lsl z4.d, p6/m, z4.d, z3.d", + "str d4, [x28, #768]" ] } } diff --git a/unittests/InstructionCountCI/VEX_map1.json b/unittests/InstructionCountCI/VEX_map1.json index 395e02f157..662ab9f331 100644 --- a/unittests/InstructionCountCI/VEX_map1.json +++ b/unittests/InstructionCountCI/VEX_map1.json @@ -15,22 +15,26 @@ }, "Instructions": { "vmovups xmm0, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x10 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vmovups xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "SVE 128-bit load already zero's the upper bits", "Map 1 0b00 0x10 128-bit" ], "ExpectedArm64ASM": [ - "ldr q16, [x4]" + "mov x20, x4", + "ldr q2, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vmovups ymm0, ymm0": { @@ -45,31 +49,37 @@ ] }, "vmovups ymm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x10 256-bit" ], "ExpectedArm64ASM": [ - "ld1b {z16.b}, p7/z, [x4]" + "mov x20, x4", + "ld1b {z2.b}, p7/z, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vmovupd xmm0, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x10 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vmovupd xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "SVE 128-bit load already zero's the upper bits", "Map 1 0b01 0x10 128-bit" ], "ExpectedArm64ASM": [ - "ldr q16, [x4]" + "mov x20, x4", + "ldr q2, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vmovupd ymm0, ymm0": { @@ -84,103 +94,125 @@ ] }, "vmovupd ymm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x10 256-bit" ], "ExpectedArm64ASM": [ - "ld1b {z16.b}, p7/z, [x4]" + "mov x20, x4", + "ld1b {z2.b}, p7/z, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vmovss xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "32-bit vector load already zero's the upper bits", "Map 1 0b10 0x10 128-bit" ], "ExpectedArm64ASM": [ - "ldr s16, [x4]" + "mov x20, x4", + "ldr s2, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vmovss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Insert in to first element could be more optimal, which is the common case.", "Map 1 0b10 0x10 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "mov v16.s[0], v18.s[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vmovsd xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "32-bit vector load already zero's the upper bits", "Map 1 0b11 0x10 128-bit" ], "ExpectedArm64ASM": [ - "ldr d16, [x4]" + "mov x20, x4", + "ldr d2, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vmovsd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Insert in to first element could be more optimal, which is the common case.", "Map 1 0b11 0x10 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "mov v16.d[0], v18.d[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vmovups [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x11 128-bit" ], "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "str q2, [x20]" ] }, "vmovups [rax], ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x11 256-bit" ], "ExpectedArm64ASM": [ - "st1b {z16.b}, p7, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1b {z2.b}, p7, [x20]" ] }, "vmovupd [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x11 128-bit" ], "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "str q2, [x20]" ] }, "vmovupd [rax], ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x11 256-bit" ], "ExpectedArm64ASM": [ - "st1b {z16.b}, p7, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1b {z2.b}, p7, [x20]" ] }, "vmovss [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b10 0x11 128-bit" ], "ExpectedArm64ASM": [ - "str s16, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "str s2, [x20]" ] }, "db 0xc5, 0xf2, 0x11, 0xc2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "vmovss xmm2, xmm1, xmm0", "Need to manually encode since nasm won't encode this", @@ -188,21 +220,26 @@ "Map 1 0b10 0x11 128-bit" ], "ExpectedArm64ASM": [ - "mov v18.16b, v17.16b", - "mov v18.s[0], v16.s[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov z18.d, p7/m, z4.d" ] }, "vmovsd [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b11 0x11 128-bit" ], "ExpectedArm64ASM": [ - "str d16, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "str d2, [x20]" ] }, "db 0xc5, 0xf3, 0x11, 0xc2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "vmovsd xmm2, xmm1, xmm0", "Need to manually encode since nasm won't encode this", @@ -210,46 +247,57 @@ "Map 1 0b11 0x11 128-bit" ], "ExpectedArm64ASM": [ - "mov v18.16b, v17.16b", - "mov v18.d[0], v16.d[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov z18.d, p7/m, z4.d" ] }, "vmovlps xmm0, xmm1, [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Insert in to first element could be more optimal, which is the common case.", "Map 1 0b00 0x12 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "mov v16.16b, v17.16b", - "mov v16.d[0], v2.d[0]" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "ldr q3, [x20]", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vmovlpd xmm0, xmm1, [rax]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Insert in to first element could be more optimal, which is the common case.", "Map 1 0b01 0x12 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "mov v16.16b, v17.16b", - "mov v16.d[0], v2.d[0]" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "ldr q3, [x20]", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vmovsldup xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b10 0x12 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "trn1 v16.4s, v2.4s, v2.4s" + "mov x20, x4", + "ldr q2, [x20]", + "trn1 v3.4s, v2.4s, v2.4s", + "mov z16.d, p7/m, z3.d" ] }, "vmovsldup ymm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Could potentially be considered optimal.", "Ideally the load happens directly in the destination register", @@ -258,22 +306,26 @@ "Map 1 0b10 0x12 256-bit" ], "ExpectedArm64ASM": [ - "ld1b {z2.b}, p7/z, [x4]", - "trn1 z16.s, z2.s, z2.s" + "mov x20, x4", + "ld1b {z2.b}, p7/z, [x20]", + "trn1 z3.s, z2.s, z2.s", + "mov z16.d, p7/m, z3.d" ] }, "vmovddup xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b11 0x12 128-bit" ], "ExpectedArm64ASM": [ - "ldr d2, [x4]", - "dup v16.2d, v2.d[0]" + "mov x20, x4", + "ldr d2, [x20]", + "dup v3.2d, v2.d[0]", + "mov z16.d, p7/m, z3.d" ] }, "vmovddup ymm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Could potentially be considered optimal.", "Ideally the load happens directly in the destination register", @@ -282,3947 +334,4851 @@ "Map 1 0b11 0x12 256-bit" ], "ExpectedArm64ASM": [ - "ld1b {z2.b}, p7/z, [x4]", - "trn1 z16.d, z2.d, z2.d" + "mov x20, x4", + "ld1b {z2.b}, p7/z, [x20]", + "trn1 z3.d, z2.d, z2.d", + "mov z16.d, p7/m, z3.d" ] }, "vmovlps [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x13 128-bit" ], "ExpectedArm64ASM": [ - "str d16, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "str d2, [x20]" ] }, "vmovlpd [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x13 128-bit" ], "ExpectedArm64ASM": [ - "str d16, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "str d2, [x20]" ] }, "vunpcklps xmm0, xmm1, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b00 0x14 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "zip1 v16.4s, v17.4s, v2.4s" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "ldr q3, [x20]", + "zip1 v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vunpcklps ymm0, ymm1, [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 1 0b00 0x14 256-bit" ], "ExpectedArm64ASM": [ - "ld1b {z2.b}, p7/z, [x4]", - "zip1 z3.s, z17.s, z2.s", - "zip2 z2.s, z17.s, z2.s", - "mov z1.q, q2", - "mov z16.d, z3.d", + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "ld1b {z3.b}, p7/z, [x20]", + "zip1 z4.s, z2.s, z3.s", + "zip2 z5.s, z2.s, z3.s", + "mov z1.q, q5", + "mov z2.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vunpcklpd xmm0, xmm1, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0x14 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "zip1 v16.2d, v17.2d, v2.2d" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "ldr q3, [x20]", + "zip1 v4.2d, v2.2d, v3.2d", + "mov z16.d, p7/m, z4.d" ] }, "vunpcklpd ymm0, ymm1, [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 1 0b01 0x14 256-bit" ], "ExpectedArm64ASM": [ - "ld1b {z2.b}, p7/z, [x4]", - "zip1 z3.d, z17.d, z2.d", - "zip2 z2.d, z17.d, z2.d", - "mov z1.q, q2", - "mov z16.d, z3.d", + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "ld1b {z3.b}, p7/z, [x20]", + "zip1 z4.d, z2.d, z3.d", + "zip2 z5.d, z2.d, z3.d", + "mov z1.q, q5", + "mov z2.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vunpckhps xmm0, xmm1, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b00 0x15 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "zip2 v16.4s, v17.4s, v2.4s" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "ldr q3, [x20]", + "zip2 v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vunpckhps ymm0, ymm1, [rax]": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b00 0x15 256-bit" ], "ExpectedArm64ASM": [ - "ld1b {z2.b}, p7/z, [x4]", - "zip1 z3.s, z17.s, z2.s", - "zip2 z2.s, z17.s, z2.s", - "mov z1.q, z3.q[1]", - "mov z16.d, z2.d", - "mov z16.b, p6/m, z1.b" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "ld1b {z3.b}, p7/z, [x20]", + "zip1 z4.s, z2.s, z3.s", + "zip2 z5.s, z2.s, z3.s", + "mov z1.q, z4.q[1]", + "mov z2.d, z5.d", + "mov z2.b, p6/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vunpckhpd xmm0, xmm1, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0x15 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "zip2 v16.2d, v17.2d, v2.2d" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "ldr q3, [x20]", + "zip2 v4.2d, v2.2d, v3.2d", + "mov z16.d, p7/m, z4.d" ] }, "vunpckhpd ymm0, ymm1, [rax]": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b01 0x15 256-bit" ], "ExpectedArm64ASM": [ - "ld1b {z2.b}, p7/z, [x4]", - "zip1 z3.d, z17.d, z2.d", - "zip2 z2.d, z17.d, z2.d", - "mov z1.q, z3.q[1]", - "mov z16.d, z2.d", - "mov z16.b, p6/m, z1.b" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "ld1b {z3.b}, p7/z, [x20]", + "zip1 z4.d, z2.d, z3.d", + "zip2 z5.d, z2.d, z3.d", + "mov z1.q, z4.q[1]", + "mov z2.d, z5.d", + "mov z2.b, p6/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vmovhps xmm0, xmm1, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b00 0x16 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.8b, v17.8b", - "ldr d3, [x4]", - "mov v16.16b, v2.16b", - "mov v16.d[1], v3.d[0]" + "mov z2.d, p7/m, z17.d", + "mov v3.8b, v2.8b", + "mov x20, x4", + "ldr d2, [x20]", + "mov v4.16b, v3.16b", + "mov v4.d[1], v2.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vmovhpd xmm0, xmm1, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0x16 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.8b, v17.8b", - "ldr d3, [x4]", - "mov v16.16b, v2.16b", - "mov v16.d[1], v3.d[0]" + "mov z2.d, p7/m, z17.d", + "mov v3.8b, v2.8b", + "mov x20, x4", + "ldr d2, [x20]", + "mov v4.16b, v3.16b", + "mov v4.d[1], v2.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vmovshdup xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b10 0x16 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "trn2 v16.4s, v2.4s, v2.4s" + "mov x20, x4", + "ldr q2, [x20]", + "trn2 v3.4s, v2.4s, v2.4s", + "mov z16.d, p7/m, z3.d" ] }, "vmovshdup ymm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b10 0x16 256-bit" ], "ExpectedArm64ASM": [ - "ld1b {z2.b}, p7/z, [x4]", - "trn2 z16.s, z2.s, z2.s" + "mov x20, x4", + "ld1b {z2.b}, p7/z, [x20]", + "trn2 z3.s, z2.s, z2.s", + "mov z16.d, p7/m, z3.d" ] }, "vmovhps [rax], xmm0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b00 0x17 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v16.16b", - "mov v2.d[0], v16.d[1]", - "str d2, [x4]" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "mov v3.d[0], v2.d[1]", + "mov x20, x4", + "str d3, [x20]" ] }, "vmovhpd [rax], xmm0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0x17 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v16.16b", - "mov v2.d[0], v16.d[1]", - "str d2, [x4]" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "mov v3.d[0], v2.d[1]", + "mov x20, x4", + "str d3, [x20]" ] }, "vmovmskps rax, xmm0": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b00 0x50 128-bit" ], "ExpectedArm64ASM": [ - "ushr v2.4s, v16.4s, #31", - "index z3.s, #0, #1", - "ushl v2.4s, v2.4s, v3.4s", - "addv s2, v2.4s", - "mov w4, v2.s[0]" + "mov z2.d, p7/m, z16.d", + "ushr v3.4s, v2.4s, #31", + "index z2.s, #0, #1", + "ushl v4.4s, v3.4s, v2.4s", + "addv s2, v4.4s", + "mov w20, v2.s[0]", + "mov x4, x20" ] }, "vmovmskps rax, ymm0": { - "ExpectedInstructionCount": 41, + "ExpectedInstructionCount": 43, "Comment": [ "Map 1 0b00 0x50 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z16.d", "mov w20, #0x0", - "mov w21, v16.s[0]", - "lsr w21, w21, #31", - "orr x20, x20, x21", - "mov w21, v16.s[1]", - "lsr w21, w21, #31", - "lsl w21, w21, #1", - "orr x20, x20, x21", - "mov w21, v16.s[2]", - "lsr w21, w21, #31", - "lsl w21, w21, #2", - "orr x20, x20, x21", - "mov w21, v16.s[3]", - "lsr w21, w21, #31", - "lsl w21, w21, #3", - "orr x20, x20, x21", + "mov w21, v2.s[0]", + "lsr w22, w21, #31", + "orr x21, x20, x22", + "mov w20, v2.s[1]", + "lsr w22, w20, #31", + "lsl w20, w22, #1", + "orr x22, x21, x20", + "mov w20, v2.s[2]", + "lsr w21, w20, #31", + "lsl w20, w21, #2", + "orr x21, x22, x20", + "mov w20, v2.s[3]", + "lsr w22, w20, #31", + "lsl w20, w22, #3", + "orr x22, x21, x20", "not p0.b, p7/z, p6.b", - "compact z0.d, p0, z16.d", - "mov w21, v16.s[0]", - "lsr w21, w21, #31", - "lsl w21, w21, #4", - "orr x20, x20, x21", + "compact z0.d, p0, z2.d", + "mov w20, v2.s[0]", + "lsr w21, w20, #31", + "lsl w20, w21, #4", + "orr x21, x22, x20", "not p0.b, p7/z, p6.b", - "compact z0.d, p0, z16.d", - "mov w21, v16.s[1]", - "lsr w21, w21, #31", - "lsl w21, w21, #5", - "orr x20, x20, x21", + "compact z0.d, p0, z2.d", + "mov w20, v2.s[1]", + "lsr w22, w20, #31", + "lsl w20, w22, #5", + "orr x22, x21, x20", "not p0.b, p7/z, p6.b", - "compact z0.d, p0, z16.d", - "mov w21, v16.s[2]", - "lsr w21, w21, #31", - "lsl w21, w21, #6", - "orr x20, x20, x21", + "compact z0.d, p0, z2.d", + "mov w20, v2.s[2]", + "lsr w21, w20, #31", + "lsl w20, w21, #6", + "orr x21, x22, x20", "not p0.b, p7/z, p6.b", - "compact z0.d, p0, z16.d", - "mov w21, v16.s[3]", - "lsr w21, w21, #31", - "lsl w21, w21, #7", - "orr x20, x20, x21", - "mov w4, w20" + "compact z0.d, p0, z2.d", + "mov w20, v2.s[3]", + "lsr w22, w20, #31", + "lsl w20, w22, #7", + "orr x22, x21, x20", + "mov w20, w22", + "mov x4, x20" ] }, "vmovmskpd rax, xmm0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0x50 128-bit" ], "ExpectedArm64ASM": [ - "uzp2 v2.4s, v16.4s, v16.4s", - "mov x20, v2.d[0]", - "bfi x20, x20, #31, #32", - "lsr x4, x20, #62" + "mov z2.d, p7/m, z16.d", + "uzp2 v3.4s, v2.4s, v2.4s", + "mov x20, v3.d[0]", + "mov x21, x20", + "bfi x21, x20, #31, #32", + "lsr x20, x21, #62", + "mov x4, x20" ] }, "vmovmskpd rax, ymm0": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 23, "Comment": [ "Map 1 0b01 0x50 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z16.d", "mov w20, #0x0", - "mov x21, v16.d[0]", - "lsr x21, x21, #63", - "orr x20, x20, x21", - "mov x21, v16.d[1]", - "lsr x21, x21, #63", - "lsl x21, x21, #1", - "orr x20, x20, x21", + "mov x21, v2.d[0]", + "lsr x22, x21, #63", + "orr x21, x20, x22", + "mov x20, v2.d[1]", + "lsr x22, x20, #63", + "lsl x20, x22, #1", + "orr x22, x21, x20", "not p0.b, p7/z, p6.b", - "compact z0.d, p0, z16.d", - "mov x21, v16.d[0]", - "lsr x21, x21, #63", - "lsl x21, x21, #2", - "orr x20, x20, x21", + "compact z0.d, p0, z2.d", + "mov x20, v2.d[0]", + "lsr x21, x20, #63", + "lsl x20, x21, #2", + "orr x21, x22, x20", "not p0.b, p7/z, p6.b", - "compact z0.d, p0, z16.d", - "mov x21, v16.d[1]", - "lsr x21, x21, #63", - "lsl x21, x21, #3", - "orr x20, x20, x21", - "mov w4, w20" + "compact z0.d, p0, z2.d", + "mov x20, v2.d[1]", + "lsr x22, x20, #63", + "lsl x20, x22, #3", + "orr x22, x21, x20", + "mov w20, w22", + "mov x4, x20" ] }, "vsqrtps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x51 128-bit" ], "ExpectedArm64ASM": [ - "fsqrt v16.4s, v17.4s" + "mov z2.d, p7/m, z17.d", + "fsqrt v3.4s, v2.4s", + "mov z16.d, p7/m, z3.d" ] }, "vsqrtps ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x51 256-bit" ], "ExpectedArm64ASM": [ - "fsqrt z16.s, p7/m, z17.s" + "mov z2.d, p7/m, z17.d", + "fsqrt z3.s, p7/m, z2.s", + "mov z16.d, p7/m, z3.d" ] }, "vsqrtpd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x51 128-bit" ], "ExpectedArm64ASM": [ - "fsqrt v16.2d, v17.2d" + "mov z2.d, p7/m, z17.d", + "fsqrt v3.2d, v2.2d", + "mov z16.d, p7/m, z3.d" ] }, "vsqrtpd ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x51 256-bit" ], "ExpectedArm64ASM": [ - "fsqrt z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "fsqrt z3.d, p7/m, z2.d", + "mov z16.d, p7/m, z3.d" ] }, "vsqrtss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b10 0x51 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fsqrt s0, s18", - "mov v16.s[0], v0.s[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fsqrt s0, s3", + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vsqrtsd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b11 0x51 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fsqrt d0, d18", - "mov v16.d[0], v0.d[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fsqrt d0, d3", + "mov v4.d[0], v0.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vrsqrtps xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b00 0x52 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", "fmov v0.4s, #0x70 (1.0000)", - "fsqrt v1.4s, v17.4s", - "fdiv v16.4s, v0.4s, v1.4s" + "fsqrt v1.4s, v2.4s", + "fdiv v3.4s, v0.4s, v1.4s", + "mov z16.d, p7/m, z3.d" ] }, "vrsqrtps ymm0, ymm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b00 0x52 256-bit" ], "ExpectedArm64ASM": [ - "fsqrt z0.s, p7/m, z17.s", - "fmov z16.s, #0x70 (1.0000)", - "fdiv z16.s, p7/m, z16.s, z0.s" + "mov z2.d, p7/m, z17.d", + "fsqrt z0.s, p7/m, z2.s", + "fmov z3.s, #0x70 (1.0000)", + "fdiv z3.s, p7/m, z3.s, z0.s", + "mov z16.d, p7/m, z3.d" ] }, "vrsqrtss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b10 0x52 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", "fmov s0, #0x70 (1.0000)", - "fsqrt s1, s18", + "fsqrt s1, s3", "fdiv s0, s0, s1", - "mov v16.s[0], v0.s[0]" + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vrcpps xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0x53 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", "fmov v0.4s, #0x70 (1.0000)", - "fdiv v16.4s, v0.4s, v17.4s" + "fdiv v3.4s, v0.4s, v2.4s", + "mov z16.d, p7/m, z3.d" ] }, "vrcpps ymm0, ymm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b00 0x53 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", "fmov z0.s, #0x70 (1.0000)", - "fdiv z0.s, p7/m, z0.s, z17.s", - "mov z16.d, z0.d" + "fdiv z0.s, p7/m, z0.s, z2.s", + "mov z3.d, z0.d", + "mov z16.d, p7/m, z3.d" ] }, "vrcpss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b10 0x53 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", "fmov s0, #0x70 (1.0000)", - "fdiv s0, s0, s18", - "mov v16.s[0], v0.s[0]" + "fdiv s0, s0, s3", + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vandps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0x54 128-bit" ], "ExpectedArm64ASM": [ - "and v16.16b, v16.16b, v17.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "and v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vandps ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0x54 256-bit" ], "ExpectedArm64ASM": [ - "and z16.d, z16.d, z17.d" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "and z4.d, z2.d, z3.d", + "mov z16.d, p7/m, z4.d" ] }, "vandpd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x54 128-bit" ], "ExpectedArm64ASM": [ - "and v16.16b, v16.16b, v17.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "and v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vandpd ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x54 256-bit" ], "ExpectedArm64ASM": [ - "and z16.d, z16.d, z17.d" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "and z4.d, z2.d, z3.d", + "mov z16.d, p7/m, z4.d" ] }, "vandnps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0x55 128-bit" ], "ExpectedArm64ASM": [ - "bic v16.16b, v17.16b, v16.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "bic v4.16b, v3.16b, v2.16b", + "mov z16.d, p7/m, z4.d" ] }, "vandnps ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0x55 256-bit" ], "ExpectedArm64ASM": [ - "bic z16.d, z17.d, z16.d" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "bic z4.d, z3.d, z2.d", + "mov z16.d, p7/m, z4.d" ] }, "vandnpd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x55 128-bit" ], "ExpectedArm64ASM": [ - "bic v16.16b, v17.16b, v16.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "bic v4.16b, v3.16b, v2.16b", + "mov z16.d, p7/m, z4.d" ] }, "vandnpd ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x55 256-bit" ], "ExpectedArm64ASM": [ - "bic z16.d, z17.d, z16.d" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "bic z4.d, z3.d, z2.d", + "mov z16.d, p7/m, z4.d" ] }, "vorps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0x56 128-bit" ], "ExpectedArm64ASM": [ - "orr v16.16b, v16.16b, v17.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "orr v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vorps ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0x56 256-bit" ], "ExpectedArm64ASM": [ - "orr z16.d, z16.d, z17.d" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "orr z4.d, z2.d, z3.d", + "mov z16.d, p7/m, z4.d" ] }, "vorpd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x56 128-bit" ], "ExpectedArm64ASM": [ - "orr v16.16b, v16.16b, v17.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "orr v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vorpd ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x56 256-bit" ], "ExpectedArm64ASM": [ - "orr z16.d, z16.d, z17.d" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "orr z4.d, z2.d, z3.d", + "mov z16.d, p7/m, z4.d" ] }, "vxorps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0x57 128-bit" ], "ExpectedArm64ASM": [ - "eor v16.16b, v16.16b, v17.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "eor v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vxorps ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0x57 256-bit" ], "ExpectedArm64ASM": [ - "eor z16.d, z16.d, z17.d" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "eor z4.d, z2.d, z3.d", + "mov z16.d, p7/m, z4.d" ] }, "vxorpd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x57 128-bit" ], "ExpectedArm64ASM": [ - "eor v16.16b, v16.16b, v17.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "eor v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vxorpd ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x57 256-bit" ], "ExpectedArm64ASM": [ - "eor z16.d, z16.d, z17.d" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "eor z4.d, z2.d, z3.d", + "mov z16.d, p7/m, z4.d" ] }, "vpunpcklbw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x60 128-bit" ], "ExpectedArm64ASM": [ - "zip1 v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "zip1 v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpunpcklbw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b01 0x60 256-bit" ], "ExpectedArm64ASM": [ - "zip1 z2.b, z17.b, z18.b", - "zip2 z3.b, z17.b, z18.b", - "mov z1.q, q3", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "zip1 z4.b, z2.b, z3.b", + "zip2 z5.b, z2.b, z3.b", + "mov z1.q, q5", + "mov z2.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vpunpcklwd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x61 128-bit" ], "ExpectedArm64ASM": [ - "zip1 v16.8h, v17.8h, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "zip1 v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpunpcklwd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b01 0x61 256-bit" ], "ExpectedArm64ASM": [ - "zip1 z2.h, z17.h, z18.h", - "zip2 z3.h, z17.h, z18.h", - "mov z1.q, q3", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "zip1 z4.h, z2.h, z3.h", + "zip2 z5.h, z2.h, z3.h", + "mov z1.q, q5", + "mov z2.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vpunpckldq xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x62 128-bit" ], "ExpectedArm64ASM": [ - "zip1 v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "zip1 v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vpunpckldq ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b01 0x62 256-bit" ], "ExpectedArm64ASM": [ - "zip1 z2.s, z17.s, z18.s", - "zip2 z3.s, z17.s, z18.s", - "mov z1.q, q3", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "zip1 z4.s, z2.s, z3.s", + "zip2 z5.s, z2.s, z3.s", + "mov z1.q, q5", + "mov z2.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vpacksswb xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0x63 128-bit" ], "ExpectedArm64ASM": [ - "sqxtn v16.8b, v17.8h", - "sqxtn2 v16.16b, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqxtn v4.8b, v2.8h", + "sqxtn2 v4.16b, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpacksswb ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 22, "Comment": [ "Map 1 0b01 0x63 256-bit" ], "ExpectedArm64ASM": [ - "sqxtnb z1.b, z18.h", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqxtnb z1.b, z3.h", "uzp1 z1.b, z1.b, z1.b", - "sqxtnb z2.b, z17.h", - "uzp1 z2.b, z2.b, z2.b", - "splice z2.b, p6, z2.b, z1.b", - "mov z1.d, z2.d[1]", - "mov z3.d, z2.d", + "sqxtnb z4.b, z2.h", + "uzp1 z4.b, z4.b, z4.b", + "splice z4.b, p6, z4.b, z1.b", + "mov z1.d, z4.d[1]", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z3.d, p0/m, z1.d", + "mov z2.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z2.d[2]", - "mov z16.d, z3.d", + "mov z1.d, z4.d[2]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpcmpgtb xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x64 128-bit" ], "ExpectedArm64ASM": [ - "cmgt v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "cmgt v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpcmpgtb ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b01 0x64 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mrs x0, nzcv", - "cmpgt p0.b, p7/z, z17.b, z18.b", - "not z0.b, p0/m, z17.b", - "movprfx z16.b, p0/z, z17.b", - "orr z16.b, p0/m, z16.b, z0.b", - "msr nzcv, x0" + "cmpgt p0.b, p7/z, z2.b, z3.b", + "not z0.b, p0/m, z2.b", + "movprfx z4.b, p0/z, z2.b", + "orr z4.b, p0/m, z4.b, z0.b", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpcmpgtw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x65 128-bit" ], "ExpectedArm64ASM": [ - "cmgt v16.8h, v17.8h, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "cmgt v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpcmpgtw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b01 0x65 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mrs x0, nzcv", - "cmpgt p0.h, p7/z, z17.h, z18.h", - "not z0.h, p0/m, z17.h", - "movprfx z16.h, p0/z, z17.h", - "orr z16.h, p0/m, z16.h, z0.h", - "msr nzcv, x0" + "cmpgt p0.h, p7/z, z2.h, z3.h", + "not z0.h, p0/m, z2.h", + "movprfx z4.h, p0/z, z2.h", + "orr z4.h, p0/m, z4.h, z0.h", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpcmpgtd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x66 128-bit" ], "ExpectedArm64ASM": [ - "cmgt v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "cmgt v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vpcmpgtd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b01 0x66 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mrs x0, nzcv", - "cmpgt p0.s, p7/z, z17.s, z18.s", - "not z0.s, p0/m, z17.s", - "movprfx z16.s, p0/z, z17.s", - "orr z16.s, p0/m, z16.s, z0.s", - "msr nzcv, x0" + "cmpgt p0.s, p7/z, z2.s, z3.s", + "not z0.s, p0/m, z2.s", + "movprfx z4.s, p0/z, z2.s", + "orr z4.s, p0/m, z4.s, z0.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpackuswb xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0x67 128-bit" ], "ExpectedArm64ASM": [ - "sqxtun v16.8b, v17.8h", - "sqxtun2 v16.16b, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqxtun v4.8b, v2.8h", + "sqxtun2 v4.16b, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpackuswb ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 22, "Comment": [ "Map 1 0b01 0x67 256-bit" ], "ExpectedArm64ASM": [ - "sqxtunb z1.b, z18.h", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqxtunb z1.b, z3.h", "uzp1 z1.b, z1.b, z1.b", - "sqxtunb z2.b, z17.h", - "uzp1 z2.b, z2.b, z2.b", - "splice z2.b, p6, z2.b, z1.b", - "mov z1.d, z2.d[1]", - "mov z3.d, z2.d", + "sqxtunb z4.b, z2.h", + "uzp1 z4.b, z4.b, z4.b", + "splice z4.b, p6, z4.b, z1.b", + "mov z1.d, z4.d[1]", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z3.d, p0/m, z1.d", + "mov z2.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z2.d[2]", - "mov z16.d, z3.d", + "mov z1.d, z4.d[2]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpshufd xmm0, xmm1, 00b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "Map 1 0b01 0x70 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v17.16b", - "mov v2.s[0], v17.s[0]", - "mov v2.s[1], v17.s[0]", - "mov v2.s[2], v17.s[0]", - "mov v16.16b, v2.16b", - "mov v16.s[3], v17.s[0]" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov v3.s[0], v2.s[0]", + "mov v4.16b, v3.16b", + "mov v4.s[1], v2.s[0]", + "mov v3.16b, v4.16b", + "mov v3.s[2], v2.s[0]", + "mov v4.16b, v3.16b", + "mov v4.s[3], v2.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vpshufd xmm0, xmm1, 01b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "Map 1 0b01 0x70 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v17.16b", - "mov v2.s[0], v17.s[1]", - "mov v2.s[1], v17.s[0]", - "mov v2.s[2], v17.s[0]", - "mov v16.16b, v2.16b", - "mov v16.s[3], v17.s[0]" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov v3.s[0], v2.s[1]", + "mov v4.16b, v3.16b", + "mov v4.s[1], v2.s[0]", + "mov v3.16b, v4.16b", + "mov v3.s[2], v2.s[0]", + "mov v4.16b, v3.16b", + "mov v4.s[3], v2.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vpshufd xmm0, xmm1, 10b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "Map 1 0b01 0x70 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v17.16b", - "mov v2.s[0], v17.s[2]", - "mov v2.s[1], v17.s[0]", - "mov v2.s[2], v17.s[0]", - "mov v16.16b, v2.16b", - "mov v16.s[3], v17.s[0]" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov v3.s[0], v2.s[2]", + "mov v4.16b, v3.16b", + "mov v4.s[1], v2.s[0]", + "mov v3.16b, v4.16b", + "mov v3.s[2], v2.s[0]", + "mov v4.16b, v3.16b", + "mov v4.s[3], v2.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vpshufd xmm0, xmm1, 11b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "Map 1 0b01 0x70 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v17.16b", - "mov v2.s[0], v17.s[3]", - "mov v2.s[1], v17.s[0]", - "mov v2.s[2], v17.s[0]", - "mov v16.16b, v2.16b", - "mov v16.s[3], v17.s[0]" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov v3.s[0], v2.s[3]", + "mov v4.16b, v3.16b", + "mov v4.s[1], v2.s[0]", + "mov v3.16b, v4.16b", + "mov v3.s[2], v2.s[0]", + "mov v4.16b, v3.16b", + "mov v4.s[3], v2.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vpshufd ymm0, ymm1, 00b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 58, "Comment": [ "Map 1 0b01 0x70 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.s, s17", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z1.s, s2", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", - "mov z16.d, z2.d", + "mov z1.s, z2.s[4]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", - "mov z16.s, p0/m, z1.s", - "msr nzcv, x0" + "mov z4.s, p0/m, z1.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpshufd ymm0, ymm1, 01b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 58, "Comment": [ "Map 1 0b01 0x70 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.s, z17.s[1]", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z1.s, z2.s[1]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[5]", + "mov z1.s, z2.s[5]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", - "mov z16.d, z2.d", + "mov z1.s, z2.s[4]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", - "mov z16.s, p0/m, z1.s", - "msr nzcv, x0" + "mov z4.s, p0/m, z1.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpshufd ymm0, ymm1, 10b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 58, "Comment": [ "Map 1 0b01 0x70 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.s, z17.s[2]", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z1.s, z2.s[2]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[6]", + "mov z1.s, z2.s[6]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", - "mov z16.d, z2.d", + "mov z1.s, z2.s[4]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", - "mov z16.s, p0/m, z1.s", - "msr nzcv, x0" + "mov z4.s, p0/m, z1.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpshufd ymm0, ymm1, 11b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 58, "Comment": [ "Map 1 0b01 0x70 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.s, z17.s[3]", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z1.s, z2.s[3]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[7]", + "mov z1.s, z2.s[7]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", - "mov z16.d, z2.d", + "mov z1.s, z2.s[4]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", - "mov z16.s, p0/m, z1.s", - "msr nzcv, x0" + "mov z4.s, p0/m, z1.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpshufhw xmm0, xmm1, 00b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "Map 1 0b10 0x70 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v17.16b", - "mov v2.h[4], v17.h[4]", - "mov v2.h[5], v17.h[4]", - "mov v2.h[6], v17.h[4]", - "mov v16.16b, v2.16b", - "mov v16.h[7], v17.h[4]" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov v3.h[4], v2.h[4]", + "mov v4.16b, v3.16b", + "mov v4.h[5], v2.h[4]", + "mov v3.16b, v4.16b", + "mov v3.h[6], v2.h[4]", + "mov v4.16b, v3.16b", + "mov v4.h[7], v2.h[4]", + "mov z16.d, p7/m, z4.d" ] }, "vpshufhw xmm0, xmm1, 01b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "Map 1 0b10 0x70 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v17.16b", - "mov v2.h[4], v17.h[5]", - "mov v2.h[5], v17.h[4]", - "mov v2.h[6], v17.h[4]", - "mov v16.16b, v2.16b", - "mov v16.h[7], v17.h[4]" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov v3.h[4], v2.h[5]", + "mov v4.16b, v3.16b", + "mov v4.h[5], v2.h[4]", + "mov v3.16b, v4.16b", + "mov v3.h[6], v2.h[4]", + "mov v4.16b, v3.16b", + "mov v4.h[7], v2.h[4]", + "mov z16.d, p7/m, z4.d" ] }, "vpshufhw xmm0, xmm1, 10b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "Map 1 0b10 0x70 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v17.16b", - "mov v2.h[4], v17.h[6]", - "mov v2.h[5], v17.h[4]", - "mov v2.h[6], v17.h[4]", - "mov v16.16b, v2.16b", - "mov v16.h[7], v17.h[4]" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov v3.h[4], v2.h[6]", + "mov v4.16b, v3.16b", + "mov v4.h[5], v2.h[4]", + "mov v3.16b, v4.16b", + "mov v3.h[6], v2.h[4]", + "mov v4.16b, v3.16b", + "mov v4.h[7], v2.h[4]", + "mov z16.d, p7/m, z4.d" ] }, "vpshufhw xmm0, xmm1, 11b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "Map 1 0b10 0x70 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v17.16b", - "mov v2.h[4], v17.h[7]", - "mov v2.h[5], v17.h[4]", - "mov v2.h[6], v17.h[4]", - "mov v16.16b, v2.16b", - "mov v16.h[7], v17.h[4]" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov v3.h[4], v2.h[7]", + "mov v4.16b, v3.16b", + "mov v4.h[5], v2.h[4]", + "mov v3.16b, v4.16b", + "mov v3.h[6], v2.h[4]", + "mov v4.16b, v3.16b", + "mov v4.h[7], v2.h[4]", + "mov z16.d, p7/m, z4.d" ] }, "vpshufhw ymm0, ymm1, 00b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 58, "Comment": [ "Map 1 0b10 0x70 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.h, z17.h[4]", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z1.h, z2.h[4]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-4", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[12]", + "mov z1.h, z2.h[12]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #4", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[4]", + "mov z1.h, z2.h[4]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-3", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[12]", + "mov z1.h, z2.h[12]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #5", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[4]", + "mov z1.h, z2.h[4]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-2", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[12]", + "mov z1.h, z2.h[12]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #6", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[4]", + "mov z1.h, z2.h[4]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-1", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[12]", - "mov z16.d, z2.d", + "mov z1.h, z2.h[12]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #7", - "mov z16.h, p0/m, z1.h", - "msr nzcv, x0" + "mov z4.h, p0/m, z1.h", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpshufhw ymm0, ymm1, 01b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 58, "Comment": [ "Map 1 0b10 0x70 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.h, z17.h[5]", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z1.h, z2.h[5]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-4", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[13]", + "mov z1.h, z2.h[13]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #4", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[4]", + "mov z1.h, z2.h[4]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-3", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[12]", + "mov z1.h, z2.h[12]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #5", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[4]", + "mov z1.h, z2.h[4]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-2", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[12]", + "mov z1.h, z2.h[12]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #6", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[4]", + "mov z1.h, z2.h[4]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-1", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[12]", - "mov z16.d, z2.d", + "mov z1.h, z2.h[12]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #7", - "mov z16.h, p0/m, z1.h", - "msr nzcv, x0" + "mov z4.h, p0/m, z1.h", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpshufhw ymm0, ymm1, 10b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 58, "Comment": [ "Map 1 0b10 0x70 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.h, z17.h[6]", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z1.h, z2.h[6]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-4", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[14]", + "mov z1.h, z2.h[14]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #4", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[4]", + "mov z1.h, z2.h[4]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-3", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[12]", + "mov z1.h, z2.h[12]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #5", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[4]", + "mov z1.h, z2.h[4]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-2", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[12]", + "mov z1.h, z2.h[12]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #6", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[4]", + "mov z1.h, z2.h[4]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-1", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[12]", - "mov z16.d, z2.d", + "mov z1.h, z2.h[12]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #7", - "mov z16.h, p0/m, z1.h", - "msr nzcv, x0" + "mov z4.h, p0/m, z1.h", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpshufhw ymm0, ymm1, 11b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 58, "Comment": [ "Map 1 0b10 0x70 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.h, z17.h[7]", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z1.h, z2.h[7]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-4", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[15]", + "mov z1.h, z2.h[15]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #4", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[4]", + "mov z1.h, z2.h[4]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-3", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[12]", + "mov z1.h, z2.h[12]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #5", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[4]", + "mov z1.h, z2.h[4]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-2", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[12]", + "mov z1.h, z2.h[12]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #6", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[4]", + "mov z1.h, z2.h[4]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-1", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[12]", - "mov z16.d, z2.d", + "mov z1.h, z2.h[12]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #7", - "mov z16.h, p0/m, z1.h", - "msr nzcv, x0" + "mov z4.h, p0/m, z1.h", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpshuflw xmm0, xmm1, 00b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "Map 1 0b11 0x70 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v17.16b", - "mov v2.h[0], v17.h[0]", - "mov v2.h[1], v17.h[0]", - "mov v2.h[2], v17.h[0]", - "mov v16.16b, v2.16b", - "mov v16.h[3], v17.h[0]" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov v3.h[0], v2.h[0]", + "mov v4.16b, v3.16b", + "mov v4.h[1], v2.h[0]", + "mov v3.16b, v4.16b", + "mov v3.h[2], v2.h[0]", + "mov v4.16b, v3.16b", + "mov v4.h[3], v2.h[0]", + "mov z16.d, p7/m, z4.d" ] }, "vpshuflw xmm0, xmm1, 01b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "Map 1 0b11 0x70 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v17.16b", - "mov v2.h[0], v17.h[1]", - "mov v2.h[1], v17.h[0]", - "mov v2.h[2], v17.h[0]", - "mov v16.16b, v2.16b", - "mov v16.h[3], v17.h[0]" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov v3.h[0], v2.h[1]", + "mov v4.16b, v3.16b", + "mov v4.h[1], v2.h[0]", + "mov v3.16b, v4.16b", + "mov v3.h[2], v2.h[0]", + "mov v4.16b, v3.16b", + "mov v4.h[3], v2.h[0]", + "mov z16.d, p7/m, z4.d" ] }, "vpshuflw xmm0, xmm1, 10b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "Map 1 0b11 0x70 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v17.16b", - "mov v2.h[0], v17.h[2]", - "mov v2.h[1], v17.h[0]", - "mov v2.h[2], v17.h[0]", - "mov v16.16b, v2.16b", - "mov v16.h[3], v17.h[0]" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov v3.h[0], v2.h[2]", + "mov v4.16b, v3.16b", + "mov v4.h[1], v2.h[0]", + "mov v3.16b, v4.16b", + "mov v3.h[2], v2.h[0]", + "mov v4.16b, v3.16b", + "mov v4.h[3], v2.h[0]", + "mov z16.d, p7/m, z4.d" ] }, "vpshuflw xmm0, xmm1, 11b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 10, "Comment": [ "Map 1 0b11 0x70 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v17.16b", - "mov v2.h[0], v17.h[3]", - "mov v2.h[1], v17.h[0]", - "mov v2.h[2], v17.h[0]", - "mov v16.16b, v2.16b", - "mov v16.h[3], v17.h[0]" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov v3.h[0], v2.h[3]", + "mov v4.16b, v3.16b", + "mov v4.h[1], v2.h[0]", + "mov v3.16b, v4.16b", + "mov v3.h[2], v2.h[0]", + "mov v4.16b, v3.16b", + "mov v4.h[3], v2.h[0]", + "mov z16.d, p7/m, z4.d" ] }, "vpshuflw ymm0, ymm1, 00b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 58, "Comment": [ "Map 1 0b11 0x70 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.h, h17", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z1.h, h2", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-8", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[8]", + "mov z1.h, z2.h[8]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #0", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, h17", + "mov z1.h, h2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-7", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[8]", + "mov z1.h, z2.h[8]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #1", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, h17", + "mov z1.h, h2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-6", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[8]", + "mov z1.h, z2.h[8]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #2", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, h17", + "mov z1.h, h2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-5", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[8]", - "mov z16.d, z2.d", + "mov z1.h, z2.h[8]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #3", - "mov z16.h, p0/m, z1.h", - "msr nzcv, x0" + "mov z4.h, p0/m, z1.h", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpshuflw ymm0, ymm1, 01b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 58, "Comment": [ "Map 1 0b11 0x70 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.h, z17.h[1]", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z1.h, z2.h[1]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-8", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[9]", + "mov z1.h, z2.h[9]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #0", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, h17", + "mov z1.h, h2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-7", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[8]", + "mov z1.h, z2.h[8]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #1", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, h17", + "mov z1.h, h2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-6", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[8]", + "mov z1.h, z2.h[8]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #2", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, h17", + "mov z1.h, h2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-5", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[8]", - "mov z16.d, z2.d", + "mov z1.h, z2.h[8]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #3", - "mov z16.h, p0/m, z1.h", - "msr nzcv, x0" + "mov z4.h, p0/m, z1.h", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpshuflw ymm0, ymm1, 10b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 58, "Comment": [ "Map 1 0b11 0x70 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.h, z17.h[2]", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z1.h, z2.h[2]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-8", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[10]", + "mov z1.h, z2.h[10]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #0", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, h17", + "mov z1.h, h2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-7", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[8]", + "mov z1.h, z2.h[8]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #1", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, h17", + "mov z1.h, h2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-6", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[8]", + "mov z1.h, z2.h[8]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #2", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, h17", + "mov z1.h, h2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-5", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[8]", - "mov z16.d, z2.d", + "mov z1.h, z2.h[8]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #3", - "mov z16.h, p0/m, z1.h", - "msr nzcv, x0" + "mov z4.h, p0/m, z1.h", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpshuflw ymm0, ymm1, 11b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 58, "Comment": [ "Map 1 0b11 0x70 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.h, z17.h[3]", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z1.h, z2.h[3]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-8", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[11]", + "mov z1.h, z2.h[11]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #0", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, h17", + "mov z1.h, h2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-7", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[8]", + "mov z1.h, z2.h[8]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #1", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, h17", + "mov z1.h, h2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-6", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[8]", + "mov z1.h, z2.h[8]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #2", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, h17", + "mov z1.h, h2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-5", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[8]", - "mov z16.d, z2.d", + "mov z1.h, z2.h[8]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #3", - "mov z16.h, p0/m, z1.h", - "msr nzcv, x0" + "mov z4.h, p0/m, z1.h", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpcmpeqb xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x74 128-bit" ], "ExpectedArm64ASM": [ - "cmeq v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "cmeq v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpcmpeqb ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b01 0x74 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mrs x0, nzcv", - "cmpeq p0.b, p7/z, z17.b, z18.b", - "not z0.b, p0/m, z17.b", - "movprfx z16.b, p0/z, z17.b", - "orr z16.b, p0/m, z16.b, z0.b", - "msr nzcv, x0" + "cmpeq p0.b, p7/z, z2.b, z3.b", + "not z0.b, p0/m, z2.b", + "movprfx z4.b, p0/z, z2.b", + "orr z4.b, p0/m, z4.b, z0.b", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpcmpeqw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x75 128-bit" ], "ExpectedArm64ASM": [ - "cmeq v16.8h, v17.8h, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "cmeq v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpcmpeqw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b01 0x75 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mrs x0, nzcv", - "cmpeq p0.h, p7/z, z17.h, z18.h", - "not z0.h, p0/m, z17.h", - "movprfx z16.h, p0/z, z17.h", - "orr z16.h, p0/m, z16.h, z0.h", - "msr nzcv, x0" + "cmpeq p0.h, p7/z, z2.h, z3.h", + "not z0.h, p0/m, z2.h", + "movprfx z4.h, p0/z, z2.h", + "orr z4.h, p0/m, z4.h, z0.h", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpcmpeqd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x76 128-bit" ], "ExpectedArm64ASM": [ - "cmeq v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "cmeq v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vpcmpeqd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b01 0x76 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mrs x0, nzcv", - "cmpeq p0.s, p7/z, z17.s, z18.s", - "not z0.s, p0/m, z17.s", - "movprfx z16.s, p0/z, z17.s", - "orr z16.s, p0/m, z16.s, z0.s", - "msr nzcv, x0" + "cmpeq p0.s, p7/z, z2.s, z3.s", + "not z0.s, p0/m, z2.s", + "movprfx z4.s, p0/z, z2.s", + "orr z4.s, p0/m, z4.s, z0.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vzeroupper": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 48, "Comment": [ "Might need to revisit this if move renaming ends up slower than some other clearing", "Map 1 0b01 0x77 L=0" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "mov v17.16b, v17.16b", - "mov v18.16b, v18.16b", - "mov v19.16b, v19.16b", - "mov v20.16b, v20.16b", - "mov v21.16b, v21.16b", - "mov v22.16b, v22.16b", - "mov v23.16b, v23.16b", - "mov v24.16b, v24.16b", - "mov v25.16b, v25.16b", - "mov v26.16b, v26.16b", - "mov v27.16b, v27.16b", - "mov v28.16b, v28.16b", - "mov v29.16b, v29.16b", - "mov v30.16b, v30.16b", - "mov v31.16b, v31.16b" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d", + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z17.d, p7/m, z3.d", + "mov z2.d, p7/m, z18.d", + "mov v3.16b, v2.16b", + "mov z18.d, p7/m, z3.d", + "mov z2.d, p7/m, z19.d", + "mov v3.16b, v2.16b", + "mov z19.d, p7/m, z3.d", + "mov z2.d, p7/m, z20.d", + "mov v3.16b, v2.16b", + "mov z20.d, p7/m, z3.d", + "mov z2.d, p7/m, z21.d", + "mov v3.16b, v2.16b", + "mov z21.d, p7/m, z3.d", + "mov z2.d, p7/m, z22.d", + "mov v3.16b, v2.16b", + "mov z22.d, p7/m, z3.d", + "mov z2.d, p7/m, z23.d", + "mov v3.16b, v2.16b", + "mov z23.d, p7/m, z3.d", + "mov z2.d, p7/m, z24.d", + "mov v3.16b, v2.16b", + "mov z24.d, p7/m, z3.d", + "mov z2.d, p7/m, z25.d", + "mov v3.16b, v2.16b", + "mov z25.d, p7/m, z3.d", + "mov z2.d, p7/m, z26.d", + "mov v3.16b, v2.16b", + "mov z26.d, p7/m, z3.d", + "mov z2.d, p7/m, z27.d", + "mov v3.16b, v2.16b", + "mov z27.d, p7/m, z3.d", + "mov z2.d, p7/m, z28.d", + "mov v3.16b, v2.16b", + "mov z28.d, p7/m, z3.d", + "mov z2.d, p7/m, z29.d", + "mov v3.16b, v2.16b", + "mov z29.d, p7/m, z3.d", + "mov z2.d, p7/m, z30.d", + "mov v3.16b, v2.16b", + "mov z30.d, p7/m, z3.d", + "mov z2.d, p7/m, z31.d", + "mov v3.16b, v2.16b", + "mov z31.d, p7/m, z3.d" ] }, "vzeroall": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 32, "Comment": [ "Map 1 0b01 0x77 L=1" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0", - "movi v17.2d, #0x0", - "movi v18.2d, #0x0", - "movi v19.2d, #0x0", - "movi v20.2d, #0x0", - "movi v21.2d, #0x0", - "movi v22.2d, #0x0", - "movi v23.2d, #0x0", - "movi v24.2d, #0x0", - "movi v25.2d, #0x0", - "movi v26.2d, #0x0", - "movi v27.2d, #0x0", - "movi v28.2d, #0x0", - "movi v29.2d, #0x0", - "movi v30.2d, #0x0", - "movi v31.2d, #0x0" + "movi v2.2d, #0x0", + "mov z16.d, p7/m, z2.d", + "movi v2.2d, #0x0", + "mov z17.d, p7/m, z2.d", + "movi v2.2d, #0x0", + "mov z18.d, p7/m, z2.d", + "movi v2.2d, #0x0", + "mov z19.d, p7/m, z2.d", + "movi v2.2d, #0x0", + "mov z20.d, p7/m, z2.d", + "movi v2.2d, #0x0", + "mov z21.d, p7/m, z2.d", + "movi v2.2d, #0x0", + "mov z22.d, p7/m, z2.d", + "movi v2.2d, #0x0", + "mov z23.d, p7/m, z2.d", + "movi v2.2d, #0x0", + "mov z24.d, p7/m, z2.d", + "movi v2.2d, #0x0", + "mov z25.d, p7/m, z2.d", + "movi v2.2d, #0x0", + "mov z26.d, p7/m, z2.d", + "movi v2.2d, #0x0", + "mov z27.d, p7/m, z2.d", + "movi v2.2d, #0x0", + "mov z28.d, p7/m, z2.d", + "movi v2.2d, #0x0", + "mov z29.d, p7/m, z2.d", + "movi v2.2d, #0x0", + "mov z30.d, p7/m, z2.d", + "movi v2.2d, #0x0", + "mov z31.d, p7/m, z2.d" ] }, "vcmpps xmm0, xmm1, xmm2, 0x00": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmeq v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmeq v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vcmpps ymm0, ymm1, ymm2, 0x00": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b00 0xC2 256-bit" ], "ExpectedArm64ASM": [ - "fcmeq p0.s, p7/z, z17.s, z18.s", - "not z0.s, p0/m, z17.s", - "movprfx z16.s, p0/z, z17.s", - "orr z16.s, p0/m, z16.s, z0.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmeq p0.s, p7/z, z2.s, z3.s", + "not z0.s, p0/m, z2.s", + "movprfx z4.s, p0/z, z2.s", + "orr z4.s, p0/m, z4.s, z0.s", + "mov z16.d, p7/m, z4.d" ] }, "vcmpps xmm0, xmm1, xmm2, 0x01": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmgt v16.4s, v18.4s, v17.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt v4.4s, v3.4s, v2.4s", + "mov z16.d, p7/m, z4.d" ] }, "vcmpps ymm0, ymm1, ymm2, 0x01": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b00 0xC2 256-bit" ], "ExpectedArm64ASM": [ - "fcmgt p0.s, p7/z, z18.s, z17.s", - "not z0.s, p0/m, z18.s", - "movprfx z16.s, p0/z, z18.s", - "orr z16.s, p0/m, z16.s, z0.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt p0.s, p7/z, z3.s, z2.s", + "not z0.s, p0/m, z3.s", + "movprfx z4.s, p0/z, z3.s", + "orr z4.s, p0/m, z4.s, z0.s", + "mov z16.d, p7/m, z4.d" ] }, "vcmpps xmm0, xmm1, xmm2, 0x02": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmge v16.4s, v18.4s, v17.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmge v4.4s, v3.4s, v2.4s", + "mov z16.d, p7/m, z4.d" ] }, "vcmpps ymm0, ymm1, ymm2, 0x02": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b00 0xC2 256-bit" ], "ExpectedArm64ASM": [ - "fcmge p0.s, p7/z, z18.s, z17.s", - "not z0.s, p0/m, z18.s", - "movprfx z16.s, p0/z, z18.s", - "orr z16.s, p0/m, z16.s, z0.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmge p0.s, p7/z, z3.s, z2.s", + "not z0.s, p0/m, z3.s", + "movprfx z4.s, p0/z, z3.s", + "orr z4.s, p0/m, z4.s, z0.s", + "mov z16.d, p7/m, z4.d" ] }, "vcmpps xmm0, xmm1, xmm2, 0x03": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b00 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmge v0.4s, v17.4s, v18.4s", - "fcmgt v1.4s, v18.4s, v17.4s", - "orr v16.16b, v0.16b, v1.16b", - "mvn v16.16b, v16.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmge v0.4s, v2.4s, v3.4s", + "fcmgt v1.4s, v3.4s, v2.4s", + "orr v4.16b, v0.16b, v1.16b", + "mvn v4.16b, v4.16b", + "mov z16.d, p7/m, z4.d" ] }, "vcmpps ymm0, ymm1, ymm2, 0x03": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b00 0xC2 256-bit" ], "ExpectedArm64ASM": [ - "fcmuo p0.s, p7/z, z17.s, z18.s", - "not z0.s, p0/m, z17.s", - "movprfx z16.s, p0/z, z17.s", - "orr z16.s, p0/m, z16.s, z0.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmuo p0.s, p7/z, z2.s, z3.s", + "not z0.s, p0/m, z2.s", + "movprfx z4.s, p0/z, z2.s", + "orr z4.s, p0/m, z4.s, z0.s", + "mov z16.d, p7/m, z4.d" ] }, "vcmpps xmm0, xmm1, xmm2, 0x04": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b00 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmeq v16.4s, v17.4s, v18.4s", - "mvn v16.16b, v16.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmeq v4.4s, v2.4s, v3.4s", + "mvn v4.16b, v4.16b", + "mov z16.d, p7/m, z4.d" ] }, "vcmpps ymm0, ymm1, ymm2, 0x04": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b00 0xC2 256-bit" ], "ExpectedArm64ASM": [ - "fcmne p0.s, p7/z, z17.s, z18.s", - "not z0.s, p0/m, z17.s", - "movprfx z16.s, p0/z, z17.s", - "orr z16.s, p0/m, z16.s, z0.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmne p0.s, p7/z, z2.s, z3.s", + "not z0.s, p0/m, z2.s", + "movprfx z4.s, p0/z, z2.s", + "orr z4.s, p0/m, z4.s, z0.s", + "mov z16.d, p7/m, z4.d" ] }, "vcmpps xmm0, xmm1, xmm2, 0x05": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b00 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmgt v2.4s, v18.4s, v17.4s", - "mvn v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt v4.4s, v3.4s, v2.4s", + "mvn v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vcmpps ymm0, ymm1, ymm2, 0x05": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b00 0xC2 256-bit" ], "ExpectedArm64ASM": [ - "fcmgt p0.s, p7/z, z18.s, z17.s", - "not z0.s, p0/m, z18.s", - "movprfx z2.s, p0/z, z18.s", - "orr z2.s, p0/m, z2.s, z0.s", - "not z16.b, p7/m, z2.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt p0.s, p7/z, z3.s, z2.s", + "not z0.s, p0/m, z3.s", + "movprfx z4.s, p0/z, z3.s", + "orr z4.s, p0/m, z4.s, z0.s", + "not z2.b, p7/m, z4.b", + "mov z16.d, p7/m, z2.d" ] }, "vcmpps xmm0, xmm1, xmm2, 0x06": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b00 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmge v2.4s, v18.4s, v17.4s", - "mvn v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmge v4.4s, v3.4s, v2.4s", + "mvn v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vcmpps ymm0, ymm1, ymm2, 0x06": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b00 0xC2 256-bit" ], "ExpectedArm64ASM": [ - "fcmge p0.s, p7/z, z18.s, z17.s", - "not z0.s, p0/m, z18.s", - "movprfx z2.s, p0/z, z18.s", - "orr z2.s, p0/m, z2.s, z0.s", - "not z16.b, p7/m, z2.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmge p0.s, p7/z, z3.s, z2.s", + "not z0.s, p0/m, z3.s", + "movprfx z4.s, p0/z, z3.s", + "orr z4.s, p0/m, z4.s, z0.s", + "not z2.b, p7/m, z4.b", + "mov z16.d, p7/m, z2.d" ] }, "vcmpps xmm0, xmm1, xmm2, 0x07": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b00 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmge v0.4s, v17.4s, v18.4s", - "fcmgt v1.4s, v18.4s, v17.4s", - "orr v16.16b, v0.16b, v1.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmge v0.4s, v2.4s, v3.4s", + "fcmgt v1.4s, v3.4s, v2.4s", + "orr v4.16b, v0.16b, v1.16b", + "mov z16.d, p7/m, z4.d" ] }, "vcmpps ymm0, ymm1, ymm2, 0x07": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b00 0xC2 256-bit" ], "ExpectedArm64ASM": [ - "fcmuo p0.s, p7/z, z17.s, z18.s", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmuo p0.s, p7/z, z2.s, z3.s", "not p0.b, p7/z, p0.b", - "not z0.s, p0/m, z17.s", - "movprfx z16.s, p0/z, z17.s", - "orr z16.s, p0/m, z16.s, z0.s" + "not z0.s, p0/m, z2.s", + "movprfx z4.s, p0/z, z2.s", + "orr z4.s, p0/m, z4.s, z0.s", + "mov z16.d, p7/m, z4.d" ] }, "vcmppd xmm0, xmm1, xmm2, 0x00": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmeq v16.2d, v17.2d, v18.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmeq v4.2d, v2.2d, v3.2d", + "mov z16.d, p7/m, z4.d" ] }, "vcmppd ymm0, ymm1, ymm2, 0x00": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0xC2 256-bit" ], "ExpectedArm64ASM": [ - "fcmeq p0.d, p7/z, z17.d, z18.d", - "not z0.d, p0/m, z17.d", - "movprfx z16.d, p0/z, z17.d", - "orr z16.d, p0/m, z16.d, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmeq p0.d, p7/z, z2.d, z3.d", + "not z0.d, p0/m, z2.d", + "movprfx z4.d, p0/z, z2.d", + "orr z4.d, p0/m, z4.d, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vcmppd xmm0, xmm1, xmm2, 0x01": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmgt v16.2d, v18.2d, v17.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt v4.2d, v3.2d, v2.2d", + "mov z16.d, p7/m, z4.d" ] }, "vcmppd ymm0, ymm1, ymm2, 0x01": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0xC2 256-bit" ], "ExpectedArm64ASM": [ - "fcmgt p0.d, p7/z, z18.d, z17.d", - "not z0.d, p0/m, z18.d", - "movprfx z16.d, p0/z, z18.d", - "orr z16.d, p0/m, z16.d, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt p0.d, p7/z, z3.d, z2.d", + "not z0.d, p0/m, z3.d", + "movprfx z4.d, p0/z, z3.d", + "orr z4.d, p0/m, z4.d, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vcmppd xmm0, xmm1, xmm2, 0x02": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmge v16.2d, v18.2d, v17.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmge v4.2d, v3.2d, v2.2d", + "mov z16.d, p7/m, z4.d" ] }, "vcmppd ymm0, ymm1, ymm2, 0x02": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0xC2 256-bit" ], "ExpectedArm64ASM": [ - "fcmge p0.d, p7/z, z18.d, z17.d", - "not z0.d, p0/m, z18.d", - "movprfx z16.d, p0/z, z18.d", - "orr z16.d, p0/m, z16.d, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmge p0.d, p7/z, z3.d, z2.d", + "not z0.d, p0/m, z3.d", + "movprfx z4.d, p0/z, z3.d", + "orr z4.d, p0/m, z4.d, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vcmppd xmm0, xmm1, xmm2, 0x03": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmge v0.2d, v17.2d, v18.2d", - "fcmgt v1.2d, v18.2d, v17.2d", - "orr v16.16b, v0.16b, v1.16b", - "mvn v16.16b, v16.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmge v0.2d, v2.2d, v3.2d", + "fcmgt v1.2d, v3.2d, v2.2d", + "orr v4.16b, v0.16b, v1.16b", + "mvn v4.16b, v4.16b", + "mov z16.d, p7/m, z4.d" ] }, "vcmppd ymm0, ymm1, ymm2, 0x03": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0xC2 256-bit" ], "ExpectedArm64ASM": [ - "fcmuo p0.d, p7/z, z17.d, z18.d", - "not z0.d, p0/m, z17.d", - "movprfx z16.d, p0/z, z17.d", - "orr z16.d, p0/m, z16.d, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmuo p0.d, p7/z, z2.d, z3.d", + "not z0.d, p0/m, z2.d", + "movprfx z4.d, p0/z, z2.d", + "orr z4.d, p0/m, z4.d, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vcmppd xmm0, xmm1, xmm2, 0x04": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmeq v16.2d, v17.2d, v18.2d", - "mvn v16.16b, v16.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmeq v4.2d, v2.2d, v3.2d", + "mvn v4.16b, v4.16b", + "mov z16.d, p7/m, z4.d" ] }, "vcmppd ymm0, ymm1, ymm2, 0x04": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0xC2 256-bit" ], "ExpectedArm64ASM": [ - "fcmne p0.d, p7/z, z17.d, z18.d", - "not z0.d, p0/m, z17.d", - "movprfx z16.d, p0/z, z17.d", - "orr z16.d, p0/m, z16.d, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmne p0.d, p7/z, z2.d, z3.d", + "not z0.d, p0/m, z2.d", + "movprfx z4.d, p0/z, z2.d", + "orr z4.d, p0/m, z4.d, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vcmppd xmm0, xmm1, xmm2, 0x05": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmgt v2.2d, v18.2d, v17.2d", - "mvn v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt v4.2d, v3.2d, v2.2d", + "mvn v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vcmppd ymm0, ymm1, ymm2, 0x05": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b01 0xC2 256-bit" ], "ExpectedArm64ASM": [ - "fcmgt p0.d, p7/z, z18.d, z17.d", - "not z0.d, p0/m, z18.d", - "movprfx z2.d, p0/z, z18.d", - "orr z2.d, p0/m, z2.d, z0.d", - "not z16.b, p7/m, z2.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt p0.d, p7/z, z3.d, z2.d", + "not z0.d, p0/m, z3.d", + "movprfx z4.d, p0/z, z3.d", + "orr z4.d, p0/m, z4.d, z0.d", + "not z2.b, p7/m, z4.b", + "mov z16.d, p7/m, z2.d" ] }, "vcmppd xmm0, xmm1, xmm2, 0x06": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmge v2.2d, v18.2d, v17.2d", - "mvn v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmge v4.2d, v3.2d, v2.2d", + "mvn v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vcmppd ymm0, ymm1, ymm2, 0x06": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b01 0xC2 256-bit" ], "ExpectedArm64ASM": [ - "fcmge p0.d, p7/z, z18.d, z17.d", - "not z0.d, p0/m, z18.d", - "movprfx z2.d, p0/z, z18.d", - "orr z2.d, p0/m, z2.d, z0.d", - "not z16.b, p7/m, z2.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmge p0.d, p7/z, z3.d, z2.d", + "not z0.d, p0/m, z3.d", + "movprfx z4.d, p0/z, z3.d", + "orr z4.d, p0/m, z4.d, z0.d", + "not z2.b, p7/m, z4.b", + "mov z16.d, p7/m, z2.d" ] }, "vcmppd xmm0, xmm1, xmm2, 0x07": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmge v0.2d, v17.2d, v18.2d", - "fcmgt v1.2d, v18.2d, v17.2d", - "orr v16.16b, v0.16b, v1.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmge v0.2d, v2.2d, v3.2d", + "fcmgt v1.2d, v3.2d, v2.2d", + "orr v4.16b, v0.16b, v1.16b", + "mov z16.d, p7/m, z4.d" ] }, "vcmppd ymm0, ymm1, ymm2, 0x07": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b01 0xC2 256-bit" ], "ExpectedArm64ASM": [ - "fcmuo p0.d, p7/z, z17.d, z18.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmuo p0.d, p7/z, z2.d, z3.d", "not p0.b, p7/z, p0.b", - "not z0.d, p0/m, z17.d", - "movprfx z16.d, p0/z, z17.d", - "orr z16.d, p0/m, z16.d, z0.d" + "not z0.d, p0/m, z2.d", + "movprfx z4.d, p0/z, z2.d", + "orr z4.d, p0/m, z4.d, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vcmpss xmm0, xmm1, xmm2, 0x00": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b10 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmeq s0, s17, s18", - "mov v16.s[0], v0.s[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmeq s0, s2, s3", + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpss xmm0, xmm1, xmm2, 0x01": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b10 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmgt s0, s18, s17", - "mov v16.s[0], v0.s[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmgt s0, s3, s2", + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpss xmm0, xmm1, xmm2, 0x02": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b10 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmge s0, s18, s17", - "mov v16.s[0], v0.s[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmge s0, s3, s2", + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpss xmm0, xmm1, xmm2, 0x03": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b10 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmge s0, s17, s18", - "fcmgt s1, s18, s17", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmge s0, s2, s3", + "fcmgt s1, s3, s2", "orr v0.8b, v0.8b, v1.8b", "mvn v0.8b, v0.8b", - "mov v16.s[0], v0.s[0]" + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpss xmm0, xmm1, xmm2, 0x04": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b10 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmeq s0, s17, s18", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmeq s0, s2, s3", "mvn v0.8b, v0.8b", - "mov v16.s[0], v0.s[0]" + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpss xmm0, xmm1, xmm2, 0x05": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b10 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmgt s2, s18, s17", - "mvn v2.16b, v2.16b", - "mov v16.16b, v17.16b", - "mov v16.s[0], v2.s[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt s4, s3, s2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpss xmm0, xmm1, xmm2, 0x06": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b10 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmge s2, s18, s17", - "mvn v2.16b, v2.16b", - "mov v16.16b, v17.16b", - "mov v16.s[0], v2.s[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmge s4, s3, s2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpss xmm0, xmm1, xmm2, 0x07": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b10 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmge s0, s17, s18", - "fcmgt s1, s18, s17", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmge s0, s2, s3", + "fcmgt s1, s3, s2", "orr v0.8b, v0.8b, v1.8b", - "mov v16.s[0], v0.s[0]" + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpsd xmm0, xmm1, xmm2, 0x00": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b11 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmeq d0, d17, d18", - "mov v16.d[0], v0.d[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmeq d0, d2, d3", + "mov v4.d[0], v0.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpsd xmm0, xmm1, xmm2, 0x01": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b11 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmgt d0, d18, d17", - "mov v16.d[0], v0.d[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmgt d0, d3, d2", + "mov v4.d[0], v0.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpsd xmm0, xmm1, xmm2, 0x02": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b11 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmge d0, d18, d17", - "mov v16.d[0], v0.d[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmge d0, d3, d2", + "mov v4.d[0], v0.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpsd xmm0, xmm1, xmm2, 0x03": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b11 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmge d0, d17, d18", - "fcmgt d1, d18, d17", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmge d0, d2, d3", + "fcmgt d1, d3, d2", "orr v0.8b, v0.8b, v1.8b", "mvn v0.8b, v0.8b", - "mov v16.d[0], v0.d[0]" + "mov v4.d[0], v0.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpsd xmm0, xmm1, xmm2, 0x04": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b11 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmeq d0, d17, d18", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmeq d0, d2, d3", "mvn v0.8b, v0.8b", - "mov v16.d[0], v0.d[0]" + "mov v4.d[0], v0.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpsd xmm0, xmm1, xmm2, 0x05": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b11 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmgt d2, d18, d17", - "mvn v2.16b, v2.16b", - "mov v16.16b, v17.16b", - "mov v16.d[0], v2.d[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt d4, d3, d2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpsd xmm0, xmm1, xmm2, 0x06": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b11 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "fcmge d2, d18, d17", - "mvn v2.16b, v2.16b", - "mov v16.16b, v17.16b", - "mov v16.d[0], v2.d[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmge d4, d3, d2", + "mvn v3.16b, v4.16b", + "mov v4.16b, v2.16b", + "mov v4.d[0], v3.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcmpsd xmm0, xmm1, xmm2, 0x07": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b11 0xC2 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcmge d0, d17, d18", - "fcmgt d1, d18, d17", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcmge d0, d2, d3", + "fcmgt d1, d3, d2", "orr v0.8b, v0.8b, v1.8b", - "mov v16.d[0], v0.d[0]" + "mov v4.d[0], v0.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vpinsrw xmm0, xmm0, eax, 000b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0xC4 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v16.16b", - "mov v2.h[0], w4", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.h[0], w20", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpinsrw xmm0, xmm1, eax, 000b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0xC4 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "mov v16.h[0], w4" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.h[0], w20", + "mov z16.d, p7/m, z3.d" ] }, "vpinsrw xmm0, xmm1, eax, 001b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0xC4 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "mov v16.h[1], w4" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.h[1], w20", + "mov z16.d, p7/m, z3.d" ] }, "vpinsrw xmm0, xmm1, eax, 111b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0xC4 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "mov v16.h[7], w4" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.h[7], w20", + "mov z16.d, p7/m, z3.d" ] }, "vpextrw eax, xmm0, 000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0xC5 128-bit" ], "ExpectedArm64ASM": [ - "umov w4, v16.h[0]" + "mov z2.d, p7/m, z16.d", + "umov w20, v2.h[0]", + "mov x4, x20" ] }, "vpextrw eax, xmm0, 001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0xC5 128-bit" ], "ExpectedArm64ASM": [ - "umov w4, v16.h[1]" + "mov z2.d, p7/m, z16.d", + "umov w20, v2.h[1]", + "mov x4, x20" ] }, "vpextrw eax, xmm0, 111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0xC5 128-bit" ], "ExpectedArm64ASM": [ - "umov w4, v16.h[7]" + "mov z2.d, p7/m, z16.d", + "umov w20, v2.h[7]", + "mov x4, x20" ] }, "vpextrw [rax], xmm0, 000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0xC5 128-bit" ], "ExpectedArm64ASM": [ - "st1 {v16.h}[0], [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1 {v2.h}[0], [x20]" ] }, "vpextrw [rax], xmm0, 001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0xC5 128-bit" ], "ExpectedArm64ASM": [ - "st1 {v16.h}[1], [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1 {v2.h}[1], [x20]" ] }, "vpextrw [rax], xmm0, 111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0xC5 128-bit" ], "ExpectedArm64ASM": [ - "st1 {v16.h}[7], [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1 {v2.h}[7], [x20]" ] }, "vshufps xmm0, xmm1, xmm2, 00b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b00 0xC6 128-bit" ], "ExpectedArm64ASM": [ - "dup v2.4s, v17.s[0]", - "dup v3.4s, v18.s[0]", - "zip1 v16.2d, v2.2d, v3.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v4.4s, v2.s[0]", + "dup v2.4s, v3.s[0]", + "zip1 v3.2d, v4.2d, v2.2d", + "mov z16.d, p7/m, z3.d" ] }, "vshufps ymm0, ymm1, ymm2, 00b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 59, "Comment": [ "Map 1 0b00 0xC6 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.s, s17", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z1.s, s2", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s18", + "mov z1.s, s3", + "mov z2.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", "mov z2.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z18.s[4]", + "mov z1.s, z3.s[4]", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s18", + "mov z1.s, s3", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", "mov z2.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z18.s[4]", - "mov z16.d, z2.d", + "mov z1.s, z3.s[4]", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", - "mov z16.s, p0/m, z1.s", - "msr nzcv, x0" + "mov z4.s, p0/m, z1.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vshufps xmm0, xmm1, xmm2, 01b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b00 0xC6 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "ldr x0, [x28, #1768]", - "ldr q2, [x0, #16]", - "tbl v16.16b, {v17.16b, v18.16b}, v2.16b" + "ldr q4, [x0, #16]", + "tbl v5.16b, {v2.16b, v3.16b}, v4.16b", + "mov z16.d, p7/m, z5.d" ] }, "vshufps ymm0, ymm1, ymm2, 01b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 59, "Comment": [ "Map 1 0b00 0xC6 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.s, z17.s[1]", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z1.s, z2.s[1]", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[5]", + "mov z1.s, z2.s[5]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s18", + "mov z1.s, s3", + "mov z2.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", "mov z2.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z18.s[4]", + "mov z1.s, z3.s[4]", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s18", + "mov z1.s, s3", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", "mov z2.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z18.s[4]", - "mov z16.d, z2.d", + "mov z1.s, z3.s[4]", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", - "mov z16.s, p0/m, z1.s", - "msr nzcv, x0" + "mov z4.s, p0/m, z1.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vshufps xmm0, xmm1, xmm2, 10b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b00 0xC6 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "ldr x0, [x28, #1768]", - "ldr q2, [x0, #32]", - "tbl v16.16b, {v17.16b, v18.16b}, v2.16b" + "ldr q4, [x0, #32]", + "tbl v5.16b, {v2.16b, v3.16b}, v4.16b", + "mov z16.d, p7/m, z5.d" ] }, "vshufps ymm0, ymm1, ymm2, 10b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 59, "Comment": [ "Map 1 0b00 0xC6 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.s, z17.s[2]", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z1.s, z2.s[2]", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[6]", + "mov z1.s, z2.s[6]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s18", + "mov z1.s, s3", + "mov z2.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", "mov z2.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z18.s[4]", + "mov z1.s, z3.s[4]", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s18", + "mov z1.s, s3", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", "mov z2.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z18.s[4]", - "mov z16.d, z2.d", + "mov z1.s, z3.s[4]", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", - "mov z16.s, p0/m, z1.s", - "msr nzcv, x0" + "mov z4.s, p0/m, z1.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vshufps xmm0, xmm1, xmm2, 11b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b00 0xC6 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "ldr x0, [x28, #1768]", - "ldr q2, [x0, #48]", - "tbl v16.16b, {v17.16b, v18.16b}, v2.16b" + "ldr q4, [x0, #48]", + "tbl v5.16b, {v2.16b, v3.16b}, v4.16b", + "mov z16.d, p7/m, z5.d" ] }, "vshufps ymm0, ymm1, ymm2, 11b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 59, "Comment": [ "Map 1 0b00 0xC6 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.s, z17.s[3]", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z1.s, z2.s[3]", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[7]", + "mov z1.s, z2.s[7]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s18", + "mov z1.s, s3", + "mov z2.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", "mov z2.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z18.s[4]", + "mov z1.s, z3.s[4]", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s18", + "mov z1.s, s3", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", "mov z2.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z18.s[4]", - "mov z16.d, z2.d", + "mov z1.s, z3.s[4]", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", - "mov z16.s, p0/m, z1.s", - "msr nzcv, x0" + "mov z4.s, p0/m, z1.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vshufpd xmm0, xmm1, xmm2, 0b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xC6 128-bit" ], "ExpectedArm64ASM": [ - "zip1 v16.2d, v17.2d, v18.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "zip1 v4.2d, v2.2d, v3.2d", + "mov z16.d, p7/m, z4.d" ] }, "vshufpd ymm0, ymm1, ymm2, 0b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 1 0b01 0xC6 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.d, d17", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z1.d, d2", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", + "mov z1.d, z2.d[2]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, d18", + "mov z1.d, d3", + "mov z2.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", "mov z2.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[2]", - "mov z16.d, z2.d", + "mov z1.d, z3.d[2]", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z4.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vshufpd xmm0, xmm1, xmm2, 1b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xC6 128-bit" ], "ExpectedArm64ASM": [ - "ext v16.16b, v17.16b, v18.16b, #8" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "ext v4.16b, v2.16b, v3.16b, #8", + "mov z16.d, p7/m, z4.d" ] }, "vshufpd ymm0, ymm1, ymm2, 1b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 1 0b01 0xC6 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.d, z17.d[1]", - "mov z2.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z1.d, z2.d[1]", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", + "mov z1.d, z2.d[2]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, d18", + "mov z1.d, d3", + "mov z2.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", "mov z2.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[2]", - "mov z16.d, z2.d", + "mov z1.d, z3.d[2]", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z4.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vmovaps xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x28 128-bit" ], "ExpectedArm64ASM": [ - "ldr q16, [x4]" + "mov x20, x4", + "ldr q2, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vmovaps ymm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x28 256-bit" ], "ExpectedArm64ASM": [ - "ld1b {z16.b}, p7/z, [x4]" + "mov x20, x4", + "ld1b {z2.b}, p7/z, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vmovaps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x29 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vmovaps ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 1 0b00 0x29 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "mov z16.d, p7/m, z2.d" ] }, "vmovapd xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x28 128-bit" ], "ExpectedArm64ASM": [ - "ldr q16, [x4]" + "mov x20, x4", + "ldr q2, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vmovapd ymm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x28 256-bit" ], "ExpectedArm64ASM": [ - "ld1b {z16.b}, p7/z, [x4]" + "mov x20, x4", + "ld1b {z2.b}, p7/z, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vmovapd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x29 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vmovapd ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 1 0b01 0x29 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "mov z16.d, p7/m, z2.d" ] }, "vmovaps [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x29 128-bit" ], "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "str q2, [x20]" ] }, "vmovaps [rax], ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x29 256-bit" ], "ExpectedArm64ASM": [ - "st1b {z16.b}, p7, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1b {z2.b}, p7, [x20]" ] }, "vmovapd [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x29 128-bit" ], "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "str q2, [x20]" ] }, "vmovapd [rax], ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x29 256-bit" ], "ExpectedArm64ASM": [ - "st1b {z16.b}, p7, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1b {z2.b}, p7, [x20]" ] }, "vcvtsi2ss xmm0, xmm1, eax": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b10 0x2A 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "scvtf s0, w4", - "mov v16.s[0], v0.s[0]" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "scvtf s0, w20", + "mov v3.s[0], v0.s[0]", + "mov z16.d, p7/m, z3.d" ] }, "vcvtsi2ss xmm0, xmm1, rax": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b10 0x2A 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "scvtf s0, x4", - "mov v16.s[0], v0.s[0]" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "scvtf s0, x20", + "mov v3.s[0], v0.s[0]", + "mov z16.d, p7/m, z3.d" ] }, "vcvtsi2sd xmm0, xmm1, eax": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b11 0x2A 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "scvtf d0, w4", - "mov v16.d[0], v0.d[0]" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "scvtf d0, w20", + "mov v3.d[0], v0.d[0]", + "mov z16.d, p7/m, z3.d" ] }, "vcvtsi2sd xmm0, xmm1, rax": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b11 0x2A 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "scvtf d0, x4", - "mov v16.d[0], v0.d[0]" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "scvtf d0, x20", + "mov v3.d[0], v0.d[0]", + "mov z16.d, p7/m, z3.d" ] }, "vmovntps [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x2B 128-bit" ], "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "str q2, [x20]" ] }, "vmovntps [rax], ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x2B 256-bit" ], "ExpectedArm64ASM": [ - "st1b {z16.b}, p7, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1b {z2.b}, p7, [x20]" ] }, "vmovntpd [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x2B 128-bit" ], "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "str q2, [x20]" ] }, "vmovntpd [rax], ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x2B 256-bit" ], "ExpectedArm64ASM": [ - "st1b {z16.b}, p7, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1b {z2.b}, p7, [x20]" ] }, "vcvttss2si eax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b10 0x2c 128-bit" ], "ExpectedArm64ASM": [ - "fcvtzs w4, s16" + "mov z2.d, p7/m, z16.d", + "fcvtzs w20, s2", + "mov x4, x20" ] }, "vcvttss2si rax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b10 0x2c 128-bit" ], "ExpectedArm64ASM": [ - "fcvtzs x4, s16" + "mov z2.d, p7/m, z16.d", + "fcvtzs x20, s2", + "mov x4, x20" ] }, "vcvttsd2si eax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b11 0x2c 128-bit" ], "ExpectedArm64ASM": [ - "fcvtzs w4, d16" + "mov z2.d, p7/m, z16.d", + "fcvtzs w20, d2", + "mov x4, x20" ] }, "vcvttsd2si rax, xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b11 0x2c 128-bit" ], "ExpectedArm64ASM": [ - "fcvtzs x4, d16" + "mov z2.d, p7/m, z16.d", + "fcvtzs x20, d2", + "mov x4, x20" ] }, "vcvtss2si eax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b10 0x2d 128-bit" ], "ExpectedArm64ASM": [ - "frinti s0, s16", - "fcvtzs w4, s0" + "mov z2.d, p7/m, z16.d", + "frinti s0, s2", + "fcvtzs w20, s0", + "mov x4, x20" ] }, "vcvtss2si rax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b10 0x2d 128-bit" ], "ExpectedArm64ASM": [ - "frinti s0, s16", - "fcvtzs x4, s0" + "mov z2.d, p7/m, z16.d", + "frinti s0, s2", + "fcvtzs x20, s0", + "mov x4, x20" ] }, "vcvtsd2si eax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b11 0x2d 128-bit" ], "ExpectedArm64ASM": [ - "frinti d0, d16", - "fcvtzs x4, d0" + "mov z2.d, p7/m, z16.d", + "frinti d0, d2", + "fcvtzs x20, d0", + "mov x4, x20" ] }, "vcvtsd2si rax, xmm0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b11 0x2d 128-bit" ], "ExpectedArm64ASM": [ - "frinti d0, d16", - "fcvtzs x4, d0" + "mov z2.d, p7/m, z16.d", + "frinti d0, d2", + "fcvtzs x20, d0", + "mov x4, x20" ] }, "vucomiss xmm0, xmm1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 15, "Comment": [ "Map 1 0b00 0x2e 128-bit" ], "ExpectedArm64ASM": [ - "fcmp s16, s17", - "mov w27, #0x0", - "cset w20, eq", - "cset w21, lo", - "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "fcmp s2, s3", + "mov w20, #0x0", + "cset w21, eq", + "cset w22, lo", + "cset w23, vs", + "orr w24, w22, w23", + "lsl x22, x24, #29", + "orr w24, w21, w23", + "orr w21, w22, w24, lsl #30", + "eor w22, w23, #0x1", + "mov x26, x22", + "mov x27, x20", + "msr nzcv, x21" ] }, "vucomisd xmm0, xmm1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 15, "Comment": [ "Map 1 0b01 0x2e 128-bit" ], "ExpectedArm64ASM": [ - "fcmp d16, d17", - "mov w27, #0x0", - "cset w20, eq", - "cset w21, lo", - "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "fcmp d2, d3", + "mov w20, #0x0", + "cset w21, eq", + "cset w22, lo", + "cset w23, vs", + "orr w24, w22, w23", + "lsl x22, x24, #29", + "orr w24, w21, w23", + "orr w21, w22, w24, lsl #30", + "eor w22, w23, #0x1", + "mov x26, x22", + "mov x27, x20", + "msr nzcv, x21" ] }, "vcomiss xmm0, xmm1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 15, "Comment": [ "Map 1 0b00 0x2f 128-bit" ], "ExpectedArm64ASM": [ - "fcmp s16, s17", - "mov w27, #0x0", - "cset w20, eq", - "cset w21, lo", - "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "fcmp s2, s3", + "mov w20, #0x0", + "cset w21, eq", + "cset w22, lo", + "cset w23, vs", + "orr w24, w22, w23", + "lsl x22, x24, #29", + "orr w24, w21, w23", + "orr w21, w22, w24, lsl #30", + "eor w22, w23, #0x1", + "mov x26, x22", + "mov x27, x20", + "msr nzcv, x21" ] }, "vcomisd xmm0, xmm1": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 15, "Comment": [ "Map 1 0b01 0x2f 128-bit" ], "ExpectedArm64ASM": [ - "fcmp d16, d17", - "mov w27, #0x0", - "cset w20, eq", - "cset w21, lo", - "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "fcmp d2, d3", + "mov w20, #0x0", + "cset w21, eq", + "cset w22, lo", + "cset w23, vs", + "orr w24, w22, w23", + "lsl x22, x24, #29", + "orr w24, w21, w23", + "orr w21, w22, w24, lsl #30", + "eor w22, w23, #0x1", + "mov x26, x22", + "mov x27, x20", + "msr nzcv, x21" ] }, "vaddps xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0x58 128-bit" ], "ExpectedArm64ASM": [ - "fadd v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fadd v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vaddps ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0x58 256-bit" ], "ExpectedArm64ASM": [ - "fadd z16.s, z17.s, z18.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fadd z4.s, z2.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vaddpd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x58 128-bit" ], "ExpectedArm64ASM": [ - "fadd v16.2d, v17.2d, v18.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fadd v4.2d, v2.2d, v3.2d", + "mov z16.d, p7/m, z4.d" ] }, "vaddpd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x58 256-bit" ], "ExpectedArm64ASM": [ - "fadd z16.d, z17.d, z18.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fadd z4.d, z2.d, z3.d", + "mov z16.d, p7/m, z4.d" ] }, "vaddss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b10 0x58 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fadd s0, s17, s18", - "mov v16.s[0], v0.s[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fadd s0, s2, s3", + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vaddsd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b11 0x58 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fadd d0, d17, d18", - "mov v16.d[0], v0.d[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fadd d0, d2, d3", + "mov v4.d[0], v0.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vmulps xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0x59 128-bit" ], "ExpectedArm64ASM": [ - "fmul v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fmul v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vmulps ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0x59 256-bit" ], "ExpectedArm64ASM": [ - "fmul z16.s, z17.s, z18.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fmul z4.s, z2.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vmulpd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x59 128-bit" ], "ExpectedArm64ASM": [ - "fmul v16.2d, v17.2d, v18.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fmul v4.2d, v2.2d, v3.2d", + "mov z16.d, p7/m, z4.d" ] }, "vmulpd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x59 256-bit" ], "ExpectedArm64ASM": [ - "fmul z16.d, z17.d, z18.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fmul z4.d, z2.d, z3.d", + "mov z16.d, p7/m, z4.d" ] }, "vmulss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b10 0x59 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fmul s0, s17, s18", - "mov v16.s[0], v0.s[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fmul s0, s2, s3", + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vmulsd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b11 0x59 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fmul d0, d17, d18", - "mov v16.d[0], v0.d[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fmul d0, d2, d3", + "mov v4.d[0], v0.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcvtps2pd xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0x5a 128-bit" ], "ExpectedArm64ASM": [ - "fcvtl v2.2d, v17.2s", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "fcvtl v3.2d, v2.2s", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d" ] }, "vcvtpd2ps xmm0, [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x5a 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x4]", - "fcvtn v16.2s, v2.2d" + "mov x20, x4", + "ldr q2, [x20]", + "fcvtn v3.2s, v2.2d", + "mov z16.d, p7/m, z3.d" ] }, "vcvtpd2ps xmm0, yword [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0x5a 128-bit" ], "ExpectedArm64ASM": [ - "ld1b {z2.b}, p7/z, [x4]", - "fcvtnt z2.s, p7/m, z2.d", - "uzp2 z2.s, z2.s, z2.s", - "mov v16.16b, v2.16b" + "mov x20, x4", + "ld1b {z2.b}, p7/z, [x20]", + "fcvtnt z3.s, p7/m, z2.d", + "uzp2 z3.s, z3.s, z3.s", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d" ] }, "vcvtpd2ps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x5a 128-bit" ], "ExpectedArm64ASM": [ - "fcvtn v16.2s, v17.2d" + "mov z2.d, p7/m, z17.d", + "fcvtn v3.2s, v2.2d", + "mov z16.d, p7/m, z3.d" ] }, "vcvtss2sd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b10 0x5a 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcvt d0, s18", - "mov v16.d[0], v0.d[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcvt d0, s3", + "mov v4.d[0], v0.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcvtsd2ss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b11 0x5a 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fcvt s0, d18", - "mov v16.s[0], v0.s[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fcvt s0, d3", + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vcvtdq2ps xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x5b 128-bit" ], "ExpectedArm64ASM": [ - "scvtf v16.4s, v17.4s" + "mov z2.d, p7/m, z17.d", + "scvtf v3.4s, v2.4s", + "mov z16.d, p7/m, z3.d" ] }, "vcvtdq2ps ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b00 0x5b 256-bit" ], "ExpectedArm64ASM": [ - "scvtf z16.s, p7/m, z17.s" + "mov z2.d, p7/m, z17.d", + "scvtf z3.s, p7/m, z2.s", + "mov z16.d, p7/m, z3.d" ] }, "vcvtps2dq xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x5b 128-bit" ], "ExpectedArm64ASM": [ - "frinti v16.4s, v17.4s", - "fcvtzs v16.4s, v16.4s" + "mov z2.d, p7/m, z17.d", + "frinti v3.4s, v2.4s", + "fcvtzs v3.4s, v3.4s", + "mov z16.d, p7/m, z3.d" ] }, "vcvtps2dq ymm0, ymm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x5b 256-bit" ], "ExpectedArm64ASM": [ - "frinti z16.s, p7/m, z17.s", - "fcvtzs z16.s, p7/m, z16.s" + "mov z2.d, p7/m, z17.d", + "frinti z3.s, p7/m, z2.s", + "fcvtzs z3.s, p7/m, z3.s", + "mov z16.d, p7/m, z3.d" ] }, "vcvttps2dq xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b10 0x5b 128-bit" ], "ExpectedArm64ASM": [ - "fcvtzs v16.4s, v17.4s" + "mov z2.d, p7/m, z17.d", + "fcvtzs v3.4s, v2.4s", + "mov z16.d, p7/m, z3.d" ] }, "vcvttps2dq ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b10 0x5b 256-bit" ], "ExpectedArm64ASM": [ - "fcvtzs z16.s, p7/m, z17.s" + "mov z2.d, p7/m, z17.d", + "fcvtzs z3.s, p7/m, z2.s", + "mov z16.d, p7/m, z3.d" ] }, "vsubps xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0x5c 128-bit" ], "ExpectedArm64ASM": [ - "fsub v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fsub v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vsubps ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0x5c 256-bit" ], "ExpectedArm64ASM": [ - "fsub z16.s, z17.s, z18.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fsub z4.s, z2.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vsubpd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x5c 128-bit" ], "ExpectedArm64ASM": [ - "fsub v16.2d, v17.2d, v18.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fsub v4.2d, v2.2d, v3.2d", + "mov z16.d, p7/m, z4.d" ] }, "vsubpd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x5c 256-bit" ], "ExpectedArm64ASM": [ - "fsub z16.d, z17.d, z18.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fsub z4.d, z2.d, z3.d", + "mov z16.d, p7/m, z4.d" ] }, "vsubss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b10 0x5c 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fsub s0, s17, s18", - "mov v16.s[0], v0.s[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fsub s0, s2, s3", + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vsubsd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b11 0x5c 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fsub d0, d17, d18", - "mov v16.d[0], v0.d[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fsub d0, d2, d3", + "mov v4.d[0], v0.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vminps xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b00 0x5d 128-bit" ], "ExpectedArm64ASM": [ - "fcmgt v0.4s, v18.4s, v17.4s", - "mov v16.16b, v17.16b", - "bif v16.16b, v18.16b, v0.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt v0.4s, v3.4s, v2.4s", + "mov v4.16b, v2.16b", + "bif v4.16b, v3.16b, v0.16b", + "mov z16.d, p7/m, z4.d" ] }, "vminps ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b00 0x5d 256-bit" ], "ExpectedArm64ASM": [ - "fcmgt p0.s, p7/z, z18.s, z17.s", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt p0.s, p7/z, z3.s, z2.s", "not p0.b, p7/z, p0.b", - "mov z0.d, z17.d", - "mov z0.s, p0/m, z18.s", - "mov z16.d, z0.d" + "mov z0.d, z2.d", + "mov z0.s, p0/m, z3.s", + "mov z4.d, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vminpd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0x5d 128-bit" ], "ExpectedArm64ASM": [ - "fcmgt v0.2d, v18.2d, v17.2d", - "mov v16.16b, v17.16b", - "bif v16.16b, v18.16b, v0.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt v0.2d, v3.2d, v2.2d", + "mov v4.16b, v2.16b", + "bif v4.16b, v3.16b, v0.16b", + "mov z16.d, p7/m, z4.d" ] }, "vminpd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b01 0x5d 256-bit" ], "ExpectedArm64ASM": [ - "fcmgt p0.d, p7/z, z18.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt p0.d, p7/z, z3.d, z2.d", "not p0.b, p7/z, p0.b", - "mov z0.d, z17.d", - "mov z0.d, p0/m, z18.d", - "mov z16.d, z0.d" + "mov z0.d, z2.d", + "mov z0.d, p0/m, z3.d", + "mov z4.d, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vminss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b10 0x5d 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mrs x20, nzcv", - "mov v16.16b, v17.16b", - "fcmp s17, s18", - "fcsel s0, s17, s18, mi", - "mov v16.s[0], v0.s[0]", + "mov v4.16b, v2.16b", + "fcmp s2, s3", + "fcsel s0, s2, s3, mi", + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d", "msr nzcv, x20" ] }, "vminsd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b11 0x5d 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mrs x20, nzcv", - "mov v16.16b, v17.16b", - "fcmp d17, d18", - "fcsel d0, d17, d18, mi", - "mov v16.d[0], v0.d[0]", + "mov v4.16b, v2.16b", + "fcmp d2, d3", + "fcsel d0, d2, d3, mi", + "mov v4.d[0], v0.d[0]", + "mov z16.d, p7/m, z4.d", "msr nzcv, x20" ] }, "vdivps xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b00 0x5e 128-bit" ], "ExpectedArm64ASM": [ - "fdiv v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fdiv v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vdivps ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 1 0b00 0x5e 256-bit" ], "ExpectedArm64ASM": [ - "fdiv z16.s, p7/m, z16.s, z18.s" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "fdiv z4.s, p7/m, z4.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vdivps ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 1 0b00 0x5e 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z0, z17", - "fdiv z0.s, p7/m, z0.s, z16.s", - "mov z16.d, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "movprfx z4, z2", + "fdiv z4.s, p7/m, z4.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vdivps ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b00 0x5e 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "fdiv z16.s, p7/m, z16.s, z18.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "fdiv z4.s, p7/m, z4.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vdivpd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x5e 128-bit" ], "ExpectedArm64ASM": [ - "fdiv v16.2d, v17.2d, v18.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fdiv v4.2d, v2.2d, v3.2d", + "mov z16.d, p7/m, z4.d" ] }, "vdivpd ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 1 0b01 0x5e 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z0, z17", - "fdiv z0.d, p7/m, z0.d, z16.d", - "mov z16.d, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "movprfx z4, z2", + "fdiv z4.d, p7/m, z4.d, z3.d", + "mov z16.d, p7/m, z4.d" ] }, "vdivpd ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 1 0b01 0x5e 256-bit" ], "ExpectedArm64ASM": [ - "fdiv z16.d, p7/m, z16.d, z18.d" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "fdiv z4.d, p7/m, z4.d, z3.d", + "mov z16.d, p7/m, z4.d" ] }, "vdivpd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0x5e 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "fdiv z16.d, p7/m, z16.d, z18.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "fdiv z4.d, p7/m, z4.d, z3.d", + "mov z16.d, p7/m, z4.d" ] }, "vdivss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b10 0x5e 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fdiv s0, s17, s18", - "mov v16.s[0], v0.s[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fdiv s0, s2, s3", + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vdivsd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b11 0x5e 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "fdiv d0, d17, d18", - "mov v16.d[0], v0.d[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "fdiv d0, d2, d3", + "mov v4.d[0], v0.d[0]", + "mov z16.d, p7/m, z4.d" ] }, "vmaxps xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b00 0x5f 128-bit" ], "ExpectedArm64ASM": [ - "fcmgt v0.4s, v18.4s, v17.4s", - "mov v16.16b, v17.16b", - "bit v16.16b, v18.16b, v0.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt v0.4s, v3.4s, v2.4s", + "mov v4.16b, v2.16b", + "bit v4.16b, v3.16b, v0.16b", + "mov z16.d, p7/m, z4.d" ] }, "vmaxps ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b00 0x5f 256-bit" ], "ExpectedArm64ASM": [ - "fcmgt p0.s, p7/z, z18.s, z17.s", - "mov z0.d, z17.d", - "mov z0.s, p0/m, z18.s", - "mov z16.d, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt p0.s, p7/z, z3.s, z2.s", + "mov z0.d, z2.d", + "mov z0.s, p0/m, z3.s", + "mov z4.d, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vmaxpd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0x5f 128-bit" ], "ExpectedArm64ASM": [ - "fcmgt v0.2d, v18.2d, v17.2d", - "mov v16.16b, v17.16b", - "bit v16.16b, v18.16b, v0.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt v0.2d, v3.2d, v2.2d", + "mov v4.16b, v2.16b", + "bit v4.16b, v3.16b, v0.16b", + "mov z16.d, p7/m, z4.d" ] }, "vmaxpd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0x5f 256-bit" ], "ExpectedArm64ASM": [ - "fcmgt p0.d, p7/z, z18.d, z17.d", - "mov z0.d, z17.d", - "mov z0.d, p0/m, z18.d", - "mov z16.d, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fcmgt p0.d, p7/z, z3.d, z2.d", + "mov z0.d, z2.d", + "mov z0.d, p0/m, z3.d", + "mov z4.d, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vmaxss xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b10 0x5f 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mrs x20, nzcv", - "mov v16.16b, v17.16b", - "fcmp s17, s18", - "fcsel s0, s18, s17, mi", - "mov v16.s[0], v0.s[0]", + "mov v4.16b, v2.16b", + "fcmp s2, s3", + "fcsel s0, s3, s2, mi", + "mov v4.s[0], v0.s[0]", + "mov z16.d, p7/m, z4.d", "msr nzcv, x20" ] }, "vmaxsd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b11 0x5f 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mrs x20, nzcv", - "mov v16.16b, v17.16b", - "fcmp d17, d18", - "fcsel d0, d18, d17, mi", - "mov v16.d[0], v0.d[0]", + "mov v4.16b, v2.16b", + "fcmp d2, d3", + "fcsel d0, d3, d2, mi", + "mov v4.d[0], v0.d[0]", + "mov z16.d, p7/m, z4.d", "msr nzcv, x20" ] }, "vpunpckhbw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x68 128-bit" ], "ExpectedArm64ASM": [ - "zip2 v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "zip2 v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpunpckhbw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b01 0x68 256-bit" ], "ExpectedArm64ASM": [ - "zip1 z2.b, z17.b, z18.b", - "zip2 z3.b, z17.b, z18.b", - "mov z1.q, z2.q[1]", - "mov z16.d, z3.d", - "mov z16.b, p6/m, z1.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "zip1 z4.b, z2.b, z3.b", + "zip2 z5.b, z2.b, z3.b", + "mov z1.q, z4.q[1]", + "mov z2.d, z5.d", + "mov z2.b, p6/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vpunpckhwd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x69 128-bit" ], "ExpectedArm64ASM": [ - "zip2 v16.8h, v17.8h, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "zip2 v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpunpckhwd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b01 0x69 256-bit" ], "ExpectedArm64ASM": [ - "zip1 z2.h, z17.h, z18.h", - "zip2 z3.h, z17.h, z18.h", - "mov z1.q, z2.q[1]", - "mov z16.d, z3.d", - "mov z16.b, p6/m, z1.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "zip1 z4.h, z2.h, z3.h", + "zip2 z5.h, z2.h, z3.h", + "mov z1.q, z4.q[1]", + "mov z2.d, z5.d", + "mov z2.b, p6/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vpunpckhdq xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x6a 128-bit" ], "ExpectedArm64ASM": [ - "zip2 v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "zip2 v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vpunpckhdq ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b01 0x6a 256-bit" ], "ExpectedArm64ASM": [ - "zip1 z2.s, z17.s, z18.s", - "zip2 z3.s, z17.s, z18.s", - "mov z1.q, z2.q[1]", - "mov z16.d, z3.d", - "mov z16.b, p6/m, z1.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "zip1 z4.s, z2.s, z3.s", + "zip2 z5.s, z2.s, z3.s", + "mov z1.q, z4.q[1]", + "mov z2.d, z5.d", + "mov z2.b, p6/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vpackssdw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0x6b 128-bit" ], "ExpectedArm64ASM": [ - "sqxtn v16.4h, v17.4s", - "sqxtn2 v16.8h, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqxtn v4.4h, v2.4s", + "sqxtn2 v4.8h, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vpackssdw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 22, "Comment": [ "Map 1 0b01 0x6b 256-bit" ], "ExpectedArm64ASM": [ - "sqxtnb z1.h, z18.s", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqxtnb z1.h, z3.s", "uzp1 z1.h, z1.h, z1.h", - "sqxtnb z2.h, z17.s", - "uzp1 z2.h, z2.h, z2.h", - "splice z2.h, p6, z2.h, z1.h", - "mov z1.d, z2.d[1]", - "mov z3.d, z2.d", + "sqxtnb z4.h, z2.s", + "uzp1 z4.h, z4.h, z4.h", + "splice z4.h, p6, z4.h, z1.h", + "mov z1.d, z4.d[1]", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z3.d, p0/m, z1.d", + "mov z2.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z2.d[2]", - "mov z16.d, z3.d", + "mov z1.d, z4.d[2]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpunpcklqdq xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x6c 128-bit" ], "ExpectedArm64ASM": [ - "zip1 v16.2d, v17.2d, v18.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "zip1 v4.2d, v2.2d, v3.2d", + "mov z16.d, p7/m, z4.d" ] }, "vpunpcklqdq ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b01 0x6c 256-bit" ], "ExpectedArm64ASM": [ - "zip1 z2.d, z17.d, z18.d", - "zip2 z3.d, z17.d, z18.d", - "mov z1.q, q3", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "zip1 z4.d, z2.d, z3.d", + "zip2 z5.d, z2.d, z3.d", + "mov z1.q, q5", + "mov z2.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vpunpckhqdq xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x6d 128-bit" ], "ExpectedArm64ASM": [ - "zip2 v16.2d, v17.2d, v18.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "zip2 v4.2d, v2.2d, v3.2d", + "mov z16.d, p7/m, z4.d" ] }, "vpunpckhqdq ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b01 0x6d 256-bit" ], "ExpectedArm64ASM": [ - "zip1 z2.d, z17.d, z18.d", - "zip2 z3.d, z17.d, z18.d", - "mov z1.q, z2.q[1]", - "mov z16.d, z3.d", - "mov z16.b, p6/m, z1.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "zip1 z4.d, z2.d, z3.d", + "zip2 z5.d, z2.d, z3.d", + "mov z1.q, z4.q[1]", + "mov z2.d, z5.d", + "mov z2.b, p6/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vmovd xmm0, dword [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x6e 128-bit" ], "ExpectedArm64ASM": [ - "ldr s16, [x4]" + "mov x20, x4", + "ldr s2, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vmovq xmm0, qword [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x6e 128-bit" ], "ExpectedArm64ASM": [ - "ldr d16, [x4]" + "mov x20, x4", + "ldr d2, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vmovdqa xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x6f 128-bit" ], "ExpectedArm64ASM": [ - "ldr q16, [x4]" + "mov x20, x4", + "ldr q2, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vmovdqa [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x6f 128-bit" ], "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "str q2, [x20]" ] }, "vmovdqu xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b10 0x6f 128-bit" ], "ExpectedArm64ASM": [ - "ldr q16, [x4]" + "mov x20, x4", + "ldr q2, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vmovdqu [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b10 0x6f 128-bit" ], "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "str q2, [x20]" ] }, "vhaddpd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0x7c 128-bit" ], "ExpectedArm64ASM": [ - "faddp v16.2d, v17.2d, v18.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "faddp v4.2d, v2.2d, v3.2d", + "mov z16.d, p7/m, z4.d" ] }, "vhaddpd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 22, "Comment": [ "Map 1 0b01 0x7c 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z0, z17", - "faddp z0.d, p7/m, z0.d, z18.d", - "uzp1 z2.d, z0.d, z0.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z0, z2", + "faddp z0.d, p7/m, z0.d, z3.d", + "uzp1 z4.d, z0.d, z0.d", "uzp2 z1.d, z0.d, z0.d", - "splice z2.d, p6, z2.d, z1.d", - "mov z1.d, z2.d[2]", - "mov z3.d, z2.d", + "splice z4.d, p6, z4.d, z1.d", + "mov z1.d, z4.d[2]", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z3.d, p0/m, z1.d", + "mov z2.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z2.d[1]", - "mov z16.d, z3.d", + "mov z1.d, z4.d[1]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vhaddps xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b11 0x7c 128-bit" ], "ExpectedArm64ASM": [ - "faddp v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "faddp v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vhaddps ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 22, "Comment": [ "Map 1 0b11 0x7c 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z0, z17", - "faddp z0.s, p7/m, z0.s, z18.s", - "uzp1 z2.s, z0.s, z0.s", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z0, z2", + "faddp z0.s, p7/m, z0.s, z3.s", + "uzp1 z4.s, z0.s, z0.s", "uzp2 z1.s, z0.s, z0.s", - "splice z2.d, p6, z2.d, z1.d", - "mov z1.d, z2.d[2]", - "mov z3.d, z2.d", + "splice z4.d, p6, z4.d, z1.d", + "mov z1.d, z4.d[2]", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z3.d, p0/m, z1.d", + "mov z2.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z2.d[1]", - "mov z16.d, z3.d", + "mov z1.d, z4.d[1]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vhsubpd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0x7d 128-bit" ], "ExpectedArm64ASM": [ - "uzp1 v2.2d, v17.2d, v18.2d", - "uzp2 v3.2d, v17.2d, v18.2d", - "fsub v16.2d, v2.2d, v3.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uzp1 v4.2d, v2.2d, v3.2d", + "uzp2 v5.2d, v2.2d, v3.2d", + "fsub v2.2d, v4.2d, v5.2d", + "mov z16.d, p7/m, z2.d" ] }, "vhsubpd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 20, "Comment": [ "Map 1 0b01 0x7d 256-bit" ], "ExpectedArm64ASM": [ - "uzp1 z2.d, z17.d, z18.d", - "uzp2 z3.d, z17.d, z18.d", - "fsub z2.d, z2.d, z3.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uzp1 z4.d, z2.d, z3.d", + "uzp2 z5.d, z2.d, z3.d", + "fsub z2.d, z4.d, z5.d", "mov z1.d, z2.d[2]", "mov z3.d, z2.d", "mrs x0, nzcv", @@ -4231,34 +5187,40 @@ "mov z3.d, p0/m, z1.d", "msr nzcv, x0", "mov z1.d, z2.d[1]", - "mov z16.d, z3.d", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z4.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vhsubps xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b11 0x7d 128-bit" ], "ExpectedArm64ASM": [ - "uzp1 v2.4s, v17.4s, v18.4s", - "uzp2 v3.4s, v17.4s, v18.4s", - "fsub v16.4s, v2.4s, v3.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uzp1 v4.4s, v2.4s, v3.4s", + "uzp2 v5.4s, v2.4s, v3.4s", + "fsub v2.4s, v4.4s, v5.4s", + "mov z16.d, p7/m, z2.d" ] }, "vhsubps ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 20, "Comment": [ "Map 1 0b11 0x7d 256-bit" ], "ExpectedArm64ASM": [ - "uzp1 z2.s, z17.s, z18.s", - "uzp2 z3.s, z17.s, z18.s", - "fsub z2.s, z2.s, z3.s", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uzp1 z4.s, z2.s, z3.s", + "uzp2 z5.s, z2.s, z3.s", + "fsub z2.s, z4.s, z5.s", "mov z1.d, z2.d[2]", "mov z3.d, z2.d", "mrs x0, nzcv", @@ -4267,1076 +5229,1373 @@ "mov z3.d, p0/m, z1.d", "msr nzcv, x0", "mov z1.d, z2.d[1]", - "mov z16.d, z3.d", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z4.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vmovd dword [rax], xmm0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0x7e 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z16.d", "movi v0.2d, #0x0", - "mov v0.s[0], v16.s[0]", - "mov v2.16b, v0.16b", - "str s2, [x4]" + "mov v0.s[0], v2.s[0]", + "mov v3.16b, v0.16b", + "mov x20, x4", + "str s3, [x20]" ] }, "vmovq qword [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x7e 128-bit" ], "ExpectedArm64ASM": [ - "str d16, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "str d2, [x20]" ] }, "vmovdqa ymm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x7f 128-bit" ], "ExpectedArm64ASM": [ - "ld1b {z16.b}, p7/z, [x4]" + "mov x20, x4", + "ld1b {z2.b}, p7/z, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vmovdqa [rax], ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0x7f 128-bit" ], "ExpectedArm64ASM": [ - "st1b {z16.b}, p7, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1b {z2.b}, p7, [x20]" ] }, "vmovdqu ymm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b10 0x7f 128-bit" ], "ExpectedArm64ASM": [ - "ld1b {z16.b}, p7/z, [x4]" + "mov x20, x4", + "ld1b {z2.b}, p7/z, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vmovdqu [rax], ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b10 0x7f 128-bit" ], "ExpectedArm64ASM": [ - "st1b {z16.b}, p7, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1b {z2.b}, p7, [x20]" ] }, "vaddsubpd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0xd0 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2112]", - "eor v2.16b, v18.16b, v2.16b", - "fadd v16.2d, v17.2d, v2.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "ldr q4, [x28, #2112]", + "eor v5.16b, v3.16b, v4.16b", + "fadd v3.2d, v2.2d, v5.2d", + "mov z16.d, p7/m, z3.d" ] }, "vaddsubpd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0xd0 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "ldr x0, [x28, #1600]", - "ld1b {z2.b}, p7/z, [x0]", - "eor z2.d, z18.d, z2.d", - "fadd z16.d, z17.d, z2.d" + "ld1b {z4.b}, p7/z, [x0]", + "eor z5.d, z3.d, z4.d", + "fadd z3.d, z2.d, z5.d", + "mov z16.d, p7/m, z3.d" ] }, "vaddsubps xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b11 0xd0 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2080]", - "eor v2.16b, v18.16b, v2.16b", - "fadd v16.4s, v17.4s, v2.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "ldr q4, [x28, #2080]", + "eor v5.16b, v3.16b, v4.16b", + "fadd v3.4s, v2.4s, v5.4s", + "mov z16.d, p7/m, z3.d" ] }, "vaddsubps ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b11 0xd0 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "ldr x0, [x28, #1584]", - "ld1b {z2.b}, p7/z, [x0]", - "eor z2.d, z18.d, z2.d", - "fadd z16.s, z17.s, z2.s" + "ld1b {z4.b}, p7/z, [x0]", + "eor z5.d, z3.d, z4.d", + "fadd z3.s, z2.s, z5.s", + "mov z16.d, p7/m, z3.d" ] }, "vpsrlw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0xd1 128-bit" ], "ExpectedArm64ASM": [ - "mov z0.d, d18", - "movprfx z2, z17", - "lsr z2.h, p6/m, z2.h, z0.d", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z0.d, d3", + "movprfx z4, z2", + "lsr z4.h, p6/m, z4.h, z0.d", + "mov v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpsrlw ymm0, ymm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0xd1 256-bit" ], "ExpectedArm64ASM": [ - "mov z0.d, d18", - "movprfx z16, z17", - "lsr z16.h, p7/m, z16.h, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z0.d, d3", + "movprfx z4, z2", + "lsr z4.h, p7/m, z4.h, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vpsrld xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0xd2 128-bit" ], "ExpectedArm64ASM": [ - "mov z0.d, d18", - "movprfx z2, z17", - "lsr z2.s, p6/m, z2.s, z0.d", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z0.d, d3", + "movprfx z4, z2", + "lsr z4.s, p6/m, z4.s, z0.d", + "mov v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpsrld ymm0, ymm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0xd2 256-bit" ], "ExpectedArm64ASM": [ - "mov z0.d, d18", - "movprfx z16, z17", - "lsr z16.s, p7/m, z16.s, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z0.d, d3", + "movprfx z4, z2", + "lsr z4.s, p7/m, z4.s, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vpsrlq xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0xd3 128-bit" ], "ExpectedArm64ASM": [ - "mov z0.d, d18", - "movprfx z2, z17", - "lsr z2.d, p6/m, z2.d, z0.d", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z0.d, d3", + "movprfx z4, z2", + "lsr z4.d, p6/m, z4.d, z0.d", + "mov v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpsrlq ymm0, ymm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0xd3 256-bit" ], "ExpectedArm64ASM": [ - "mov z0.d, d18", - "movprfx z16, z17", - "lsr z16.d, p7/m, z16.d, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z0.d, d3", + "movprfx z4, z2", + "lsr z4.d, p7/m, z4.d, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vpaddq xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xd4 128-bit" ], "ExpectedArm64ASM": [ - "add v16.2d, v17.2d, v18.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "add v4.2d, v2.2d, v3.2d", + "mov z16.d, p7/m, z4.d" ] }, "vpaddq ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xd4 256-bit" ], "ExpectedArm64ASM": [ - "add z16.d, z17.d, z18.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "add z4.d, z2.d, z3.d", + "mov z16.d, p7/m, z4.d" ] }, "vpmullw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xd5 128-bit" ], "ExpectedArm64ASM": [ - "mul v16.8h, v17.8h, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mul v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpmullw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xd4 256-bit" ], "ExpectedArm64ASM": [ - "mul z16.h, z17.h, z18.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mul z4.h, z2.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vmovq [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0xd6 256-bit" ], "ExpectedArm64ASM": [ - "str d16, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "str d2, [x20]" ] }, "vpmovmskb rax, xmm0": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": [ "Map 1 0b01 0xd7 256-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2272]", - "cmlt v3.16b, v16.16b, #0", - "and v2.16b, v3.16b, v2.16b", - "addp v2.16b, v2.16b, v2.16b", - "addp v2.8b, v2.8b, v2.8b", - "addp v2.8b, v2.8b, v2.8b", - "umov w4, v2.h[0]" + "mov z2.d, p7/m, z16.d", + "ldr q3, [x28, #2272]", + "cmlt v4.16b, v2.16b, #0", + "and v2.16b, v4.16b, v3.16b", + "addp v3.16b, v2.16b, v2.16b", + "addp v2.8b, v3.8b, v3.8b", + "addp v3.8b, v2.8b, v2.8b", + "umov w20, v3.h[0]", + "mov x4, x20" ] }, "vpmovmskb rax, ymm0": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "Map 1 0b01 0xd7 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z16.d", "ldr x0, [x28, #1680]", - "ld1b {z2.b}, p7/z, [x0]", + "ld1b {z3.b}, p7/z, [x0]", "mrs x0, nzcv", "mov z0.d, #0", - "cmplt p0.b, p7/z, z16.b, #0", - "not z0.b, p0/m, z16.b", - "orr z0.b, p0/m, z0.b, z16.b", - "mov z3.d, z0.d", + "cmplt p0.b, p7/z, z2.b, #0", + "not z0.b, p0/m, z2.b", + "orr z0.b, p0/m, z0.b, z2.b", + "mov z4.d, z0.d", "msr nzcv, x0", - "and z2.d, z3.d, z2.d", + "and z2.d, z4.d, z3.d", "movprfx z0, z2", "addp z0.b, p7/m, z0.b, z2.b", - "uzp1 z2.b, z0.b, z0.b", + "uzp1 z3.b, z0.b, z0.b", "uzp2 z1.b, z0.b, z0.b", - "splice z2.d, p6, z2.d, z1.d", - "addp v2.16b, v2.16b, v2.16b", - "addp v2.8b, v2.8b, v2.8b", - "mov w4, v2.s[0]" + "splice z3.d, p6, z3.d, z1.d", + "addp v2.16b, v3.16b, v3.16b", + "addp v3.8b, v2.8b, v2.8b", + "mov w20, v3.s[0]", + "mov x4, x20" ] }, "vpsubusb xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xd8 128-bit" ], "ExpectedArm64ASM": [ - "uqsub v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uqsub v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpsubusb ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xd8 256-bit" ], "ExpectedArm64ASM": [ - "uqsub z16.b, z17.b, z18.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uqsub z4.b, z2.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpsubusw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xd9 128-bit" ], "ExpectedArm64ASM": [ - "uqsub v16.8h, v17.8h, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uqsub v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpsubusw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xd9 256-bit" ], "ExpectedArm64ASM": [ - "uqsub z16.h, z17.h, z18.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uqsub z4.h, z2.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpminub xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xda 128-bit" ], "ExpectedArm64ASM": [ - "umin v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "umin v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpminub ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 1 0b01 0xda 256-bit" ], "ExpectedArm64ASM": [ - "umin z16.b, p7/m, z16.b, z17.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "movprfx z4, z2", + "umin z4.b, p7/m, z4.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpminub ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 1 0b01 0xda 256-bit" ], "ExpectedArm64ASM": [ - "umin z16.b, p7/m, z16.b, z18.b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "umin z4.b, p7/m, z4.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpminub ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0xda 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "umin z16.b, p7/m, z16.b, z18.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "umin z4.b, p7/m, z4.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpand xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xdb 128-bit" ], "ExpectedArm64ASM": [ - "and v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "and v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpand ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xdb 256-bit" ], "ExpectedArm64ASM": [ - "and z16.d, z17.d, z18.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "and z4.d, z2.d, z3.d", + "mov z16.d, p7/m, z4.d" ] }, "vpaddusb xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xdc 128-bit" ], "ExpectedArm64ASM": [ - "uqadd v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uqadd v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpaddusb ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xdc 256-bit" ], "ExpectedArm64ASM": [ - "uqadd z16.b, z17.b, z18.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uqadd z4.b, z2.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpaddusw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xdd 128-bit" ], "ExpectedArm64ASM": [ - "uqadd v16.8h, v17.8h, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uqadd v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpaddusw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xdd 256-bit" ], "ExpectedArm64ASM": [ - "uqadd z16.h, z17.h, z18.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uqadd z4.h, z2.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxub xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Aliasing source and destination", "Map 1 0b01 0xdd 128-bit" ], "ExpectedArm64ASM": [ - "umax v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "umax v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxub ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 1 0b01 0xde 256-bit" ], "ExpectedArm64ASM": [ - "umax z16.b, p7/m, z16.b, z18.b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "umax z4.b, p7/m, z4.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxub ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 1 0b01 0xde 256-bit" ], "ExpectedArm64ASM": [ - "umax z16.b, p7/m, z16.b, z17.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "movprfx z4, z2", + "umax z4.b, p7/m, z4.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxub ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0xde 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "umax z16.b, p7/m, z16.b, z18.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "umax z4.b, p7/m, z4.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpandn xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xdf 128-bit" ], "ExpectedArm64ASM": [ - "bic v16.16b, v18.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "bic v4.16b, v3.16b, v2.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpandn ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xdf 256-bit" ], "ExpectedArm64ASM": [ - "bic z16.d, z18.d, z17.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "bic z4.d, z3.d, z2.d", + "mov z16.d, p7/m, z4.d" ] }, "vpavgb xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xe0 128-bit" ], "ExpectedArm64ASM": [ - "urhadd v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "urhadd v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpavgb ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 1 0b01 0xe0 256-bit" ], "ExpectedArm64ASM": [ - "urhadd z16.b, p7/m, z16.b, z17.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "movprfx z4, z2", + "urhadd z4.b, p7/m, z4.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpavgb ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 1 0b01 0xe0 256-bit" ], "ExpectedArm64ASM": [ - "urhadd z16.b, p7/m, z16.b, z18.b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "urhadd z4.b, p7/m, z4.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpavgb ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0xe0 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "urhadd z16.b, p7/m, z16.b, z18.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "urhadd z4.b, p7/m, z4.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpsraw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0xe1 128-bit" ], "ExpectedArm64ASM": [ - "mov z0.d, d18", - "movprfx z2, z17", - "asr z2.h, p6/m, z2.h, z0.d", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z0.d, d3", + "movprfx z4, z2", + "asr z4.h, p6/m, z4.h, z0.d", + "mov v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpsraw ymm0, ymm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0xe1 256-bit" ], "ExpectedArm64ASM": [ - "mov z0.d, d18", - "movprfx z16, z17", - "asr z16.h, p7/m, z16.h, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z0.d, d3", + "movprfx z4, z2", + "asr z4.h, p7/m, z4.h, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vpsrad xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0xe2 128-bit" ], "ExpectedArm64ASM": [ - "mov z0.d, d18", - "movprfx z2, z17", - "asr z2.s, p6/m, z2.s, z0.d", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z0.d, d3", + "movprfx z4, z2", + "asr z4.s, p6/m, z4.s, z0.d", + "mov v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpsrad ymm0, ymm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0xe2 256-bit" ], "ExpectedArm64ASM": [ - "mov z0.d, d18", - "movprfx z16, z17", - "asr z16.s, p7/m, z16.s, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z0.d, d3", + "movprfx z4, z2", + "asr z4.s, p7/m, z4.s, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vpavgw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xe3 128-bit" ], "ExpectedArm64ASM": [ - "urhadd v16.8h, v17.8h, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "urhadd v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpavgw ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 1 0b01 0xe3 256-bit" ], "ExpectedArm64ASM": [ - "urhadd z16.h, p7/m, z16.h, z17.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "movprfx z4, z2", + "urhadd z4.h, p7/m, z4.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpavgw ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 1 0b01 0xe3 256-bit" ], "ExpectedArm64ASM": [ - "urhadd z16.h, p7/m, z16.h, z18.h" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "urhadd z4.h, p7/m, z4.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpavgw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0xe3 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "urhadd z16.h, p7/m, z16.h, z18.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "urhadd z4.h, p7/m, z4.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpmulhuw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0xe4 128-bit" ], "ExpectedArm64ASM": [ - "movprfx z2, z17", - "umulh z2.h, p6/m, z2.h, z18.h", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "umulh z4.h, p6/m, z4.h, z3.h", + "mov v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpmulhuw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xe4 256-bit" ], "ExpectedArm64ASM": [ - "umulh z16.h, z17.h, z18.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "umulh z4.h, z2.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpmulhw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0xe5 128-bit" ], "ExpectedArm64ASM": [ - "movprfx z2, z17", - "smulh z2.h, p6/m, z2.h, z18.h", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "smulh z4.h, p6/m, z4.h, z3.h", + "mov v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpmulhw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xe5 256-bit" ], "ExpectedArm64ASM": [ - "smulh z16.h, z17.h, z18.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "smulh z4.h, z2.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vcvttpd2dq xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0xe6 128-bit" ], "ExpectedArm64ASM": [ - "fcvtn v2.2s, v17.2d", - "fcvtzs v2.4s, v2.4s", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "fcvtn v3.2s, v2.2d", + "fcvtzs v2.4s, v3.4s", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vcvttpd2dq xmm0, ymm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0xe6 256-bit" ], "ExpectedArm64ASM": [ - "fcvtnt z2.s, p7/m, z17.d", - "uzp2 z2.s, z2.s, z2.s", - "fcvtzs z2.s, p7/m, z2.s", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "fcvtnt z3.s, p7/m, z2.d", + "uzp2 z3.s, z3.s, z3.s", + "fcvtzs z2.s, p7/m, z3.s", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vcvtdq2pd xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b10 0xe6 128-bit" ], "ExpectedArm64ASM": [ - "sxtl v2.2d, v17.2s", - "scvtf v16.2d, v2.2d" + "mov z2.d, p7/m, z17.d", + "sxtl v3.2d, v2.2s", + "scvtf v2.2d, v3.2d", + "mov z16.d, p7/m, z2.d" ] }, "vcvtdq2pd ymm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b10 0xe6 256-bit" ], "ExpectedArm64ASM": [ - "sunpklo z2.d, z17.s", - "scvtf z16.d, p7/m, z2.d" + "mov z2.d, p7/m, z17.d", + "sunpklo z3.d, z2.s", + "scvtf z2.d, p7/m, z3.d", + "mov z16.d, p7/m, z2.d" ] }, "vcvtpd2dq xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b11 0xe6 128-bit" ], "ExpectedArm64ASM": [ - "fcvtn v2.2s, v17.2d", - "frinti v2.4s, v2.4s", + "mov z2.d, p7/m, z17.d", + "fcvtn v3.2s, v2.2d", + "frinti v2.4s, v3.4s", "fcvtzs v2.4s, v2.4s", - "mov v16.16b, v2.16b" + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vcvtpd2dq xmm0, ymm1": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b11 0xe6 256-bit" ], "ExpectedArm64ASM": [ - "fcvtnt z2.s, p7/m, z17.d", - "uzp2 z2.s, z2.s, z2.s", - "frinti z2.s, p7/m, z2.s", + "mov z2.d, p7/m, z17.d", + "fcvtnt z3.s, p7/m, z2.d", + "uzp2 z3.s, z3.s, z3.s", + "frinti z2.s, p7/m, z3.s", "fcvtzs z2.s, p7/m, z2.s", - "mov v16.16b, v2.16b" + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vmovntdq [rax], xmm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0xe7 128-bit" ], "ExpectedArm64ASM": [ - "str q16, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "str q2, [x20]" ] }, "vmovntdq [rax], ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b01 0xe7 256-bit" ], "ExpectedArm64ASM": [ - "st1b {z16.b}, p7, [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1b {z2.b}, p7, [x20]" ] }, "vpsubsb xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xe8 128-bit" ], "ExpectedArm64ASM": [ - "sqsub v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqsub v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpsubsb ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xe8 256-bit" ], "ExpectedArm64ASM": [ - "sqsub z16.b, z17.b, z18.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqsub z4.b, z2.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpsubsw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xe9 128-bit" ], "ExpectedArm64ASM": [ - "sqsub v16.8h, v17.8h, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqsub v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpsubsw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xe9 256-bit" ], "ExpectedArm64ASM": [ - "sqsub z16.h, z17.h, z18.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqsub z4.h, z2.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpminsw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xea 128-bit" ], "ExpectedArm64ASM": [ - "smin v16.8h, v17.8h, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "smin v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpminsw ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 1 0b01 0xea 256-bit" ], "ExpectedArm64ASM": [ - "smin z16.h, p7/m, z16.h, z17.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "movprfx z4, z2", + "smin z4.h, p7/m, z4.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpminsw ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 1 0b01 0xea 256-bit" ], "ExpectedArm64ASM": [ - "smin z16.h, p7/m, z16.h, z18.h" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "smin z4.h, p7/m, z4.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpminsw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0xea 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "smin z16.h, p7/m, z16.h, z18.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "smin z4.h, p7/m, z4.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpor xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xeb 128-bit" ], "ExpectedArm64ASM": [ - "orr v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "orr v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpor ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xeb 256-bit" ], "ExpectedArm64ASM": [ - "orr z16.d, z17.d, z18.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "orr z4.d, z2.d, z3.d", + "mov z16.d, p7/m, z4.d" ] }, "vpaddsb xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xec 128-bit" ], "ExpectedArm64ASM": [ - "sqadd v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqadd v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpaddsb ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xec 256-bit" ], "ExpectedArm64ASM": [ - "sqadd z16.b, z17.b, z18.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqadd z4.b, z2.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpaddsw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xed 128-bit" ], "ExpectedArm64ASM": [ - "sqadd v16.8h, v17.8h, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqadd v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpaddsw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xed 256-bit" ], "ExpectedArm64ASM": [ - "sqadd z16.h, z17.h, z18.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqadd z4.h, z2.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxsw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xee 128-bit" ], "ExpectedArm64ASM": [ - "smax v16.8h, v17.8h, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "smax v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxsw ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 1 0b01 0xee 256-bit" ], "ExpectedArm64ASM": [ - "smax z16.h, p7/m, z16.h, z17.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "movprfx z4, z2", + "smax z4.h, p7/m, z4.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxsw ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 1 0b01 0xee 256-bit" ], "ExpectedArm64ASM": [ - "smax z16.h, p7/m, z16.h, z18.h" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "smax z4.h, p7/m, z4.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxsw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0xee 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "smax z16.h, p7/m, z16.h, z18.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "smax z4.h, p7/m, z4.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpxor xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xef 128-bit" ], "ExpectedArm64ASM": [ - "eor v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "eor v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpxor ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xef 256-bit" ], "ExpectedArm64ASM": [ - "eor z16.d, z17.d, z18.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "eor z4.d, z2.d, z3.d", + "mov z16.d, p7/m, z4.d" ] }, "vlddqu xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b11 0xf0 128-bit" ], "ExpectedArm64ASM": [ - "ldr q16, [x4]" + "mov x20, x4", + "ldr q2, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vlddqu ymm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 1 0b11 0xf0 256-bit" ], "ExpectedArm64ASM": [ - "ld1b {z16.b}, p7/z, [x4]" + "mov x20, x4", + "ld1b {z2.b}, p7/z, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vpsllw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0xf1 128-bit" ], "ExpectedArm64ASM": [ - "mov z0.d, d18", - "movprfx z2, z17", - "lsl z2.h, p6/m, z2.h, z0.d", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z0.d, d3", + "movprfx z4, z2", + "lsl z4.h, p6/m, z4.h, z0.d", + "mov v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpsllw ymm0, ymm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0xf1 256-bit" ], "ExpectedArm64ASM": [ - "mov z0.d, d18", - "movprfx z16, z17", - "lsl z16.h, p7/m, z16.h, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z0.d, d3", + "movprfx z4, z2", + "lsl z4.h, p7/m, z4.h, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vpslld xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0xf2 128-bit" ], "ExpectedArm64ASM": [ - "mov z0.d, d18", - "movprfx z2, z17", - "lsl z2.s, p6/m, z2.s, z0.d", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z0.d, d3", + "movprfx z4, z2", + "lsl z4.s, p6/m, z4.s, z0.d", + "mov v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpslld ymm0, ymm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0xf2 256-bit" ], "ExpectedArm64ASM": [ - "mov z0.d, d18", - "movprfx z16, z17", - "lsl z16.s, p7/m, z16.s, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z0.d, d3", + "movprfx z4, z2", + "lsl z4.s, p7/m, z4.s, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vpsllq xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0xf3 128-bit" ], "ExpectedArm64ASM": [ - "mov z0.d, d18", - "movprfx z2, z17", - "lsl z2.d, p6/m, z2.d, z0.d", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z0.d, d3", + "movprfx z4, z2", + "lsl z4.d, p6/m, z4.d, z0.d", + "mov v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpsllq ymm0, ymm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0xf3 256-bit" ], "ExpectedArm64ASM": [ - "mov z0.d, d18", - "movprfx z16, z17", - "lsl z16.d, p7/m, z16.d, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z0.d, d3", + "movprfx z4, z2", + "lsl z4.d, p7/m, z4.d, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vpmuludq xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0xf4 128-bit" ], "ExpectedArm64ASM": [ - "uzp1 v2.4s, v17.4s, v17.4s", - "uzp1 v3.4s, v18.4s, v18.4s", - "umull v16.2d, v2.2s, v3.2s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uzp1 v4.4s, v2.4s, v2.4s", + "uzp1 v2.4s, v3.4s, v3.4s", + "umull v3.2d, v4.2s, v2.2s", + "mov z16.d, p7/m, z3.d" ] }, "vpmuludq ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b01 0xf4 256-bit" ], "ExpectedArm64ASM": [ - "uzp1 z2.s, z17.s, z17.s", - "uzp1 z3.s, z18.s, z18.s", - "umullb z0.d, z2.s, z3.s", - "umullt z1.d, z2.s, z3.s", - "zip1 z16.d, z0.d, z1.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uzp1 z4.s, z2.s, z2.s", + "uzp1 z2.s, z3.s, z3.s", + "umullb z0.d, z4.s, z2.s", + "umullt z1.d, z4.s, z2.s", + "zip1 z3.d, z0.d, z1.d", + "mov z16.d, p7/m, z3.d" ] }, "vpmaddwd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b01 0xf5 128-bit" ], "ExpectedArm64ASM": [ - "smull v2.4s, v17.4h, v18.4h", - "smull2 v3.4s, v17.8h, v18.8h", - "addp v16.4s, v2.4s, v3.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "smull v4.4s, v2.4h, v3.4h", + "smull2 v5.4s, v2.8h, v3.8h", + "addp v2.4s, v4.4s, v5.4s", + "mov z16.d, p7/m, z2.d" ] }, "vpmaddwd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 14, "Comment": [ "Map 1 0b01 0xf5 256-bit" ], "ExpectedArm64ASM": [ - "smullb z0.s, z17.h, z18.h", - "smullt z1.s, z17.h, z18.h", - "zip1 z2.s, z0.s, z1.s", - "smullb z0.s, z17.h, z18.h", - "smullt z1.s, z17.h, z18.h", - "zip2 z3.s, z0.s, z1.s", - "movprfx z0, z2", - "addp z0.s, p7/m, z0.s, z3.s", - "uzp1 z16.s, z0.s, z0.s", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "smullb z0.s, z2.h, z3.h", + "smullt z1.s, z2.h, z3.h", + "zip1 z4.s, z0.s, z1.s", + "smullb z0.s, z2.h, z3.h", + "smullt z1.s, z2.h, z3.h", + "zip2 z5.s, z0.s, z1.s", + "movprfx z0, z4", + "addp z0.s, p7/m, z0.s, z5.s", + "uzp1 z2.s, z0.s, z0.s", "uzp2 z1.s, z0.s, z0.s", - "splice z16.d, p6, z16.d, z1.d" + "splice z2.d, p6, z2.d, z1.d", + "mov z16.d, p7/m, z2.d" ] }, "vpsadbw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b01 0xf6 128-bit" ], "ExpectedArm64ASM": [ - "uabdl v2.8h, v17.8b, v18.8b", - "uabdl2 v3.8h, v17.16b, v18.16b", - "addv h2, v2.8h", - "addv h3, v3.8h", - "zip1 v16.2d, v2.2d, v3.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uabdl v4.8h, v2.8b, v3.8b", + "uabdl2 v5.8h, v2.16b, v3.16b", + "addv h2, v4.8h", + "addv h3, v5.8h", + "zip1 v4.2d, v2.2d, v3.2d", + "mov z16.d, p7/m, z4.d" ] }, "vpsadbw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 37, + "ExpectedInstructionCount": 41, "Comment": [ "Map 1 0b01 0xf6 256-bit" ], "ExpectedArm64ASM": [ - "uabdlb z0.h, z17.b, z18.b", - "uabdlt z1.h, z17.b, z18.b", - "zip1 z2.h, z0.h, z1.h", - "uabdlb z0.h, z17.b, z18.b", - "uabdlt z1.h, z17.b, z18.b", - "zip2 z3.h, z0.h, z1.h", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uabdlb z0.h, z2.b, z3.b", + "uabdlt z1.h, z2.b, z3.b", + "zip1 z4.h, z0.h, z1.h", + "uabdlb z0.h, z2.b, z3.b", + "uabdlt z1.h, z2.b, z3.b", + "zip2 z5.h, z0.h, z1.h", + "addv h2, v4.8h", + "addv h3, v5.8h", + "zip1 z6.d, z2.d, z3.d", + "mov z2.q, z4.q[1]", + "mov z3.q, z5.q[1]", "addv h4, v2.8h", - "addv h5, v3.8h", - "zip1 z4.d, z4.d, z5.d", - "mov z2.q, z2.q[1]", - "mov z3.q, z3.q[1]", - "addv h2, v2.8h", - "addv h3, v3.8h", - "mov z1.d, d3", + "addv h2, v3.8h", + "mov z1.d, d2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.q, q2", - "mov z2.d, z4.d", + "mov z1.q, q3", + "mov z2.d, z6.d", "not p0.b, p7/z, p6.b", "mov z2.b, p0/m, z1.b", "mov z1.d, z2.d[1]", @@ -5347,150 +6606,197 @@ "mov z3.d, p0/m, z1.d", "msr nzcv, x0", "mov z1.d, z2.d[2]", - "mov z16.d, z3.d", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z4.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vmaskmovdqu xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "Map 1 0b01 0xf7 128-bit" ], "ExpectedArm64ASM": [ - "cmlt v2.16b, v17.16b, #0", - "ldr q3, [x11]", - "bsl v2.16b, v16.16b, v3.16b", - "str q2, [x11]" + "mov z2.d, p7/m, z17.d", + "cmlt v3.16b, v2.16b, #0", + "mov z2.d, p7/m, z16.d", + "mov x20, x11", + "ldr q4, [x20]", + "mov v5.16b, v3.16b", + "bsl v5.16b, v2.16b, v4.16b", + "str q5, [x20]" ] }, "vpsubb xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xf8 128-bit" ], "ExpectedArm64ASM": [ - "sub v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sub v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpsubb ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xf8 256-bit" ], "ExpectedArm64ASM": [ - "sub z16.b, z17.b, z18.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sub z4.b, z2.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpsubw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xf9 128-bit" ], "ExpectedArm64ASM": [ - "sub v16.8h, v17.8h, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sub v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpsubw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xf9 256-bit" ], "ExpectedArm64ASM": [ - "sub z16.h, z17.h, z18.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sub z4.h, z2.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpsubd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xfa 128-bit" ], "ExpectedArm64ASM": [ - "sub v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sub v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vpsubd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xfa 256-bit" ], "ExpectedArm64ASM": [ - "sub z16.s, z17.s, z18.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sub z4.s, z2.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vpsubq xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xfb 128-bit" ], "ExpectedArm64ASM": [ - "sub v16.2d, v17.2d, v18.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sub v4.2d, v2.2d, v3.2d", + "mov z16.d, p7/m, z4.d" ] }, "vpsubq ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xfb 256-bit" ], "ExpectedArm64ASM": [ - "sub z16.d, z17.d, z18.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sub z4.d, z2.d, z3.d", + "mov z16.d, p7/m, z4.d" ] }, "vpaddb xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xfc 128-bit" ], "ExpectedArm64ASM": [ - "add v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "add v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpaddb ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xfc 256-bit" ], "ExpectedArm64ASM": [ - "add z16.b, z17.b, z18.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "add z4.b, z2.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpaddw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xfd 128-bit" ], "ExpectedArm64ASM": [ - "add v16.8h, v17.8h, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "add v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpaddw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xfd 256-bit" ], "ExpectedArm64ASM": [ - "add z16.h, z17.h, z18.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "add z4.h, z2.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpaddd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xfe 128-bit" ], "ExpectedArm64ASM": [ - "add v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "add v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vpaddd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 1 0b01 0xfe 256-bit" ], "ExpectedArm64ASM": [ - "add z16.s, z17.s, z18.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "add z4.s, z2.s, z3.s", + "mov z16.d, p7/m, z4.d" ] } } diff --git a/unittests/InstructionCountCI/VEX_map1_FCMA.json b/unittests/InstructionCountCI/VEX_map1_FCMA.json index b4aca96368..18a5695d06 100644 --- a/unittests/InstructionCountCI/VEX_map1_FCMA.json +++ b/unittests/InstructionCountCI/VEX_map1_FCMA.json @@ -10,94 +10,120 @@ }, "Instructions": { "vaddsubpd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b01 0xd0 128-bit" ], "ExpectedArm64ASM": [ - "ext v2.16b, v18.16b, v18.16b, #8", - "fcadd v16.2d, v17.2d, v2.2d, #90" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "ext v4.16b, v3.16b, v3.16b, #8", + "fcadd v3.2d, v2.2d, v4.2d, #90", + "mov z16.d, p7/m, z3.d" ] }, "vaddsubpd ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "Aliasing source and destination", "Map 1 0b01 0xd0 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z2, z18", - "ext z2.b, z2.b, z18.b, #8", - "fcadd z16.d, p7/m, z16.d, z2.d, #90" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z3", + "ext z4.b, z4.b, z3.b, #8", + "movprfx z3, z2", + "fcadd z3.d, p7/m, z3.d, z4.d, #90", + "mov z16.d, p7/m, z3.d" ] }, "vaddsubpd ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Aliasing source and destination", "Map 1 0b01 0xd0 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z2, z16", - "ext z2.b, z2.b, z16.b, #8", - "movprfx z16, z17", - "fcadd z16.d, p7/m, z16.d, z2.d, #90" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "movprfx z4, z3", + "ext z4.b, z4.b, z3.b, #8", + "movprfx z3, z2", + "fcadd z3.d, p7/m, z3.d, z4.d, #90", + "mov z16.d, p7/m, z3.d" ] }, "vaddsubpd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 1 0b01 0xd0 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z2, z18", - "ext z2.b, z2.b, z18.b, #8", - "movprfx z16, z17", - "fcadd z16.d, p7/m, z16.d, z2.d, #90" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z3", + "ext z4.b, z4.b, z3.b, #8", + "movprfx z3, z2", + "fcadd z3.d, p7/m, z3.d, z4.d, #90", + "mov z16.d, p7/m, z3.d" ] }, "vaddsubps xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 1 0b11 0xd0 128-bit" ], "ExpectedArm64ASM": [ - "rev64 v2.4s, v18.4s", - "fcadd v16.4s, v17.4s, v2.4s, #90" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "rev64 v4.4s, v3.4s", + "fcadd v3.4s, v2.4s, v4.4s, #90", + "mov z16.d, p7/m, z3.d" ] }, "vaddsubps ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Aliasing source and destination", "Map 1 0b11 0xd0 256-bit" ], "ExpectedArm64ASM": [ - "revw z2.d, p7/m, z16.d", - "movprfx z16, z17", - "fcadd z16.s, p7/m, z16.s, z2.s, #90" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "revw z4.d, p7/m, z3.d", + "movprfx z3, z2", + "fcadd z3.s, p7/m, z3.s, z4.s, #90", + "mov z16.d, p7/m, z3.d" ] }, "vaddsubps ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "Aliasing source and destination", "Map 1 0b11 0xd0 256-bit" ], "ExpectedArm64ASM": [ - "revw z2.d, p7/m, z18.d", - "fcadd z16.s, p7/m, z16.s, z2.s, #90" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "revw z4.d, p7/m, z3.d", + "movprfx z3, z2", + "fcadd z3.s, p7/m, z3.s, z4.s, #90", + "mov z16.d, p7/m, z3.d" ] }, "vaddsubps ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 1 0b11 0xd0 256-bit" ], "ExpectedArm64ASM": [ - "revw z2.d, p7/m, z18.d", - "movprfx z16, z17", - "fcadd z16.s, p7/m, z16.s, z2.s, #90" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "revw z4.d, p7/m, z3.d", + "movprfx z3, z2", + "fcadd z3.s, p7/m, z3.s, z4.s, #90", + "mov z16.d, p7/m, z3.d" ] } } diff --git a/unittests/InstructionCountCI/VEX_map2.json b/unittests/InstructionCountCI/VEX_map2.json index b2ccc16d47..7cf224d27b 100644 --- a/unittests/InstructionCountCI/VEX_map2.json +++ b/unittests/InstructionCountCI/VEX_map2.json @@ -12,127 +12,150 @@ }, "Instructions": { "vpshufb xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b01 0x00 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.16b, #0x8f", - "and v2.16b, v18.16b, v2.16b", - "tbl v16.16b, {v17.16b}, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.16b, #0x8f", + "and v5.16b, v3.16b, v4.16b", + "tbl v3.16b, {v2.16b}, v5.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpshufb ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 14, "Comment": [ "Map 2 0b01 0x00 256-bit" ], "ExpectedArm64ASM": [ - "mov z2.b, #-113", - "and z2.d, z18.d, z2.d", - "tbl v3.16b, {v17.16b}, v2.16b", - "mov z1.q, z17.q[1]", - "mov z4.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z4.b, #-113", + "and z5.d, z3.d, z4.d", + "tbl v3.16b, {v2.16b}, v5.16b", + "mov z1.q, z2.q[1]", + "mov z4.d, z2.d", "mov z4.b, p6/m, z1.b", - "tbl v2.16b, {v4.16b}, v2.16b", + "tbl v2.16b, {v4.16b}, v5.16b", "mov z1.q, q2", - "mov z16.d, z3.d", + "mov z4.d, z3.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z4.b, p0/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vphaddw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x01 128-bit" ], "ExpectedArm64ASM": [ - "addp v16.8h, v17.8h, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "addp v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vphaddw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 22, "Comment": [ "Map 2 0b01 0x01 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z0, z17", - "addp z0.h, p7/m, z0.h, z18.h", - "uzp1 z2.h, z0.h, z0.h", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z0, z2", + "addp z0.h, p7/m, z0.h, z3.h", + "uzp1 z4.h, z0.h, z0.h", "uzp2 z1.h, z0.h, z0.h", - "splice z2.d, p6, z2.d, z1.d", - "mov z1.d, z2.d[2]", - "mov z3.d, z2.d", + "splice z4.d, p6, z4.d, z1.d", + "mov z1.d, z4.d[2]", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z3.d, p0/m, z1.d", + "mov z2.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z2.d[1]", - "mov z16.d, z3.d", + "mov z1.d, z4.d[1]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vphaddd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "addp v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "addp v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vphaddd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 22, "Comment": [ "Map 2 0b01 0x02 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z0, z17", - "addp z0.s, p7/m, z0.s, z18.s", - "uzp1 z2.s, z0.s, z0.s", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z0, z2", + "addp z0.s, p7/m, z0.s, z3.s", + "uzp1 z4.s, z0.s, z0.s", "uzp2 z1.s, z0.s, z0.s", - "splice z2.d, p6, z2.d, z1.d", - "mov z1.d, z2.d[2]", - "mov z3.d, z2.d", + "splice z4.d, p6, z4.d, z1.d", + "mov z1.d, z4.d[2]", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z3.d, p0/m, z1.d", + "mov z2.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z2.d[1]", - "mov z16.d, z3.d", + "mov z1.d, z4.d[1]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vphaddsw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b01 0x03 128-bit" ], "ExpectedArm64ASM": [ - "uzp1 v2.8h, v17.8h, v18.8h", - "uzp2 v3.8h, v17.8h, v18.8h", - "sqadd v16.8h, v2.8h, v3.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uzp1 v4.8h, v2.8h, v3.8h", + "uzp2 v5.8h, v2.8h, v3.8h", + "sqadd v2.8h, v4.8h, v5.8h", + "mov z16.d, p7/m, z2.d" ] }, "vphaddsw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 20, "Comment": [ "Map 2 0b01 0x03 256-bit" ], "ExpectedArm64ASM": [ - "uzp1 z2.h, z17.h, z18.h", - "uzp2 z3.h, z17.h, z18.h", - "sqadd z2.h, z2.h, z3.h", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uzp1 z4.h, z2.h, z3.h", + "uzp2 z5.h, z2.h, z3.h", + "sqadd z2.h, z4.h, z5.h", "mov z1.d, z2.d[2]", "mov z3.d, z2.d", "mrs x0, nzcv", @@ -141,68 +164,80 @@ "mov z3.d, p0/m, z1.d", "msr nzcv, x0", "mov z1.d, z2.d[1]", - "mov z16.d, z3.d", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z4.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpmaddubsw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": [ "Map 2 0b01 0x04 128-bit" ], "ExpectedArm64ASM": [ - "uxtl v2.8h, v17.8b", - "sxtl v3.8h, v18.8b", - "mul v2.8h, v2.8h, v3.8h", - "uxtl2 v3.8h, v17.16b", - "sxtl2 v4.8h, v18.16b", - "mul v3.8h, v3.8h, v4.8h", - "uzp1 v4.8h, v2.8h, v3.8h", - "uzp2 v2.8h, v2.8h, v3.8h", - "sqadd v16.8h, v4.8h, v2.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uxtl v4.8h, v2.8b", + "sxtl v5.8h, v3.8b", + "mul v6.8h, v4.8h, v5.8h", + "uxtl2 v4.8h, v2.16b", + "sxtl2 v2.8h, v3.16b", + "mul v3.8h, v4.8h, v2.8h", + "uzp1 v2.8h, v6.8h, v3.8h", + "uzp2 v4.8h, v6.8h, v3.8h", + "sqadd v3.8h, v2.8h, v4.8h", + "mov z16.d, p7/m, z3.d" ] }, "vpmaddubsw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 12, "Comment": [ "Map 2 0b01 0x04 256-bit" ], "ExpectedArm64ASM": [ - "uunpklo z2.h, z17.b", - "sunpklo z3.h, z18.b", - "mul z2.h, z2.h, z3.h", - "uunpkhi z3.h, z17.b", - "sunpkhi z4.h, z18.b", - "mul z3.h, z3.h, z4.h", - "uzp1 z4.h, z2.h, z3.h", - "uzp2 z2.h, z2.h, z3.h", - "sqadd z16.h, z4.h, z2.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uunpklo z4.h, z2.b", + "sunpklo z5.h, z3.b", + "mul z6.h, z4.h, z5.h", + "uunpkhi z4.h, z2.b", + "sunpkhi z2.h, z3.b", + "mul z3.h, z4.h, z2.h", + "uzp1 z2.h, z6.h, z3.h", + "uzp2 z4.h, z6.h, z3.h", + "sqadd z3.h, z2.h, z4.h", + "mov z16.d, p7/m, z3.d" ] }, "vphsubw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b01 0x05 128-bit" ], "ExpectedArm64ASM": [ - "uzp1 v2.8h, v17.8h, v18.8h", - "uzp2 v3.8h, v17.8h, v18.8h", - "sub v16.8h, v2.8h, v3.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uzp1 v4.8h, v2.8h, v3.8h", + "uzp2 v5.8h, v2.8h, v3.8h", + "sub v2.8h, v4.8h, v5.8h", + "mov z16.d, p7/m, z2.d" ] }, "vphsubw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 20, "Comment": [ "Map 2 0b01 0x05 256-bit" ], "ExpectedArm64ASM": [ - "uzp1 z2.h, z17.h, z18.h", - "uzp2 z3.h, z17.h, z18.h", - "sub z2.h, z2.h, z3.h", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uzp1 z4.h, z2.h, z3.h", + "uzp2 z5.h, z2.h, z3.h", + "sub z2.h, z4.h, z5.h", "mov z1.d, z2.d[2]", "mov z3.d, z2.d", "mrs x0, nzcv", @@ -211,34 +246,40 @@ "mov z3.d, p0/m, z1.d", "msr nzcv, x0", "mov z1.d, z2.d[1]", - "mov z16.d, z3.d", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z4.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vphsubd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b01 0x06 128-bit" ], "ExpectedArm64ASM": [ - "uzp1 v2.4s, v17.4s, v18.4s", - "uzp2 v3.4s, v17.4s, v18.4s", - "sub v16.4s, v2.4s, v3.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uzp1 v4.4s, v2.4s, v3.4s", + "uzp2 v5.4s, v2.4s, v3.4s", + "sub v2.4s, v4.4s, v5.4s", + "mov z16.d, p7/m, z2.d" ] }, "vphsubd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 20, "Comment": [ "Map 2 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "uzp1 z2.s, z17.s, z18.s", - "uzp2 z3.s, z17.s, z18.s", - "sub z2.s, z2.s, z3.s", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uzp1 z4.s, z2.s, z3.s", + "uzp2 z5.s, z2.s, z3.s", + "sub z2.s, z4.s, z5.s", "mov z1.d, z2.d[2]", "mov z3.d, z2.d", "mrs x0, nzcv", @@ -247,34 +288,40 @@ "mov z3.d, p0/m, z1.d", "msr nzcv, x0", "mov z1.d, z2.d[1]", - "mov z16.d, z3.d", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z4.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vphsubsw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b01 0x07 128-bit" ], "ExpectedArm64ASM": [ - "uzp1 v2.8h, v17.8h, v18.8h", - "uzp2 v3.8h, v17.8h, v18.8h", - "sqsub v16.8h, v2.8h, v3.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uzp1 v4.8h, v2.8h, v3.8h", + "uzp2 v5.8h, v2.8h, v3.8h", + "sqsub v2.8h, v4.8h, v5.8h", + "mov z16.d, p7/m, z2.d" ] }, "vphsubsw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 20, "Comment": [ "Map 2 0b01 0x07 256-bit" ], "ExpectedArm64ASM": [ - "uzp1 z2.h, z17.h, z18.h", - "uzp2 z3.h, z17.h, z18.h", - "sqsub z2.h, z2.h, z3.h", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uzp1 z4.h, z2.h, z3.h", + "uzp2 z5.h, z2.h, z3.h", + "sqsub z2.h, z4.h, z5.h", "mov z1.d, z2.d[2]", "mov z3.d, z2.d", "mrs x0, nzcv", @@ -283,323 +330,385 @@ "mov z3.d, p0/m, z1.d", "msr nzcv, x0", "mov z1.d, z2.d[1]", - "mov z16.d, z3.d", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z4.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpsignb xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b01 0x08 128-bit" ], "ExpectedArm64ASM": [ - "sqshl v2.16b, v18.16b, #7", - "srshr v2.16b, v2.16b, #7", - "mul v16.16b, v17.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqshl v4.16b, v3.16b, #7", + "srshr v3.16b, v4.16b, #7", + "mul v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpsignb ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "Map 2 0b01 0x08 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z2, z18", - "sqshl z2.b, p7/m, z2.b, #7", - "srshr z2.b, p7/m, z2.b, #7", - "mul z16.b, z17.b, z2.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z3", + "sqshl z4.b, p7/m, z4.b, #7", + "movprfx z3, z4", + "srshr z3.b, p7/m, z3.b, #7", + "mul z4.b, z2.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpsignw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b01 0x09 128-bit" ], "ExpectedArm64ASM": [ - "sqshl v2.8h, v18.8h, #15", - "srshr v2.8h, v2.8h, #15", - "mul v16.8h, v17.8h, v2.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqshl v4.8h, v3.8h, #15", + "srshr v3.8h, v4.8h, #15", + "mul v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpsignw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "Map 2 0b01 0x09 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z2, z18", - "sqshl z2.h, p7/m, z2.h, #15", - "srshr z2.h, p7/m, z2.h, #15", - "mul z16.h, z17.h, z2.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z3", + "sqshl z4.h, p7/m, z4.h, #15", + "movprfx z3, z4", + "srshr z3.h, p7/m, z3.h, #15", + "mul z4.h, z2.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpsignd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b01 0x0a 128-bit" ], "ExpectedArm64ASM": [ - "sqshl v2.4s, v18.4s, #31", - "srshr v2.4s, v2.4s, #31", - "mul v16.4s, v17.4s, v2.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqshl v4.4s, v3.4s, #31", + "srshr v3.4s, v4.4s, #31", + "mul v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vpsignd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "Map 2 0b01 0x0a 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z2, z18", - "sqshl z2.s, p7/m, z2.s, #31", - "srshr z2.s, p7/m, z2.s, #31", - "mul z16.s, z17.s, z2.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z3", + "sqshl z4.s, p7/m, z4.s, #31", + "movprfx z3, z4", + "srshr z3.s, p7/m, z3.s, #31", + "mul z4.s, z2.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vpmulhrsw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 11, + "ExpectedInstructionCount": 14, "Comment": [ "Map 2 0b01 0x0b 128-bit" ], "ExpectedArm64ASM": [ - "smull v2.4s, v17.4h, v18.4h", - "smull2 v3.4s, v17.8h, v18.8h", - "sshr v2.4s, v2.4s, #14", - "sshr v3.4s, v3.4s, #14", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "smull v4.4s, v2.4h, v3.4h", + "smull2 v5.4s, v2.8h, v3.8h", + "sshr v2.4s, v4.4s, #14", + "sshr v3.4s, v5.4s, #14", "movi v4.4s, #0x1, lsl #0", - "add v2.4s, v2.4s, v4.4s", - "add v3.4s, v3.4s, v4.4s", - "shrn v2.4h, v2.4s, #1", - "mov v0.16b, v2.16b", - "shrn2 v0.8h, v3.4s, #1", - "mov v16.16b, v0.16b" + "add v5.4s, v2.4s, v4.4s", + "add v2.4s, v3.4s, v4.4s", + "shrn v3.4h, v5.4s, #1", + "mov v0.16b, v3.16b", + "shrn2 v0.8h, v2.4s, #1", + "mov v4.16b, v0.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpmulhrsw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 22, "Comment": [ "Map 2 0b01 0x0b 256-bit" ], "ExpectedArm64ASM": [ - "smullb z0.s, z17.h, z18.h", - "smullt z1.s, z17.h, z18.h", - "zip1 z2.s, z0.s, z1.s", - "smullb z0.s, z17.h, z18.h", - "smullt z1.s, z17.h, z18.h", - "zip2 z3.s, z0.s, z1.s", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "smullb z0.s, z2.h, z3.h", + "smullt z1.s, z2.h, z3.h", + "zip1 z4.s, z0.s, z1.s", + "smullb z0.s, z2.h, z3.h", + "smullt z1.s, z2.h, z3.h", + "zip2 z5.s, z0.s, z1.s", + "movprfx z2, z4", "asr z2.s, p7/m, z2.s, #14", + "movprfx z3, z5", "asr z3.s, p7/m, z3.s, #14", "mov z4.s, #1", - "add z2.s, z2.s, z4.s", - "add z3.s, z3.s, z4.s", - "shrnb z2.h, z2.s, #1", - "uzp1 z2.h, z2.h, z2.h", - "shrnb z1.h, z3.s, #1", + "add z5.s, z2.s, z4.s", + "add z2.s, z3.s, z4.s", + "shrnb z3.h, z5.s, #1", + "uzp1 z3.h, z3.h, z3.h", + "shrnb z1.h, z2.s, #1", "uzp1 z1.h, z1.h, z1.h", - "movprfx z16, z2", - "splice z16.h, p6, z16.h, z1.h" + "movprfx z4, z3", + "splice z4.h, p6, z4.h, z1.h", + "mov z16.d, p7/m, z4.d" ] }, "vpermilps xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 13, "Comment": [ "Map 2 0b01 0x0c 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.4s, #0x3, lsl #0", - "and v2.16b, v18.16b, v2.16b", - "trn1 v2.16b, v2.16b, v2.16b", - "trn1 v2.8h, v2.8h, v2.8h", - "shl v2.16b, v2.16b, #2", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.4s, #0x3, lsl #0", + "and v5.16b, v3.16b, v4.16b", + "trn1 v3.16b, v5.16b, v5.16b", + "trn1 v4.8h, v3.8h, v3.8h", + "shl v3.16b, v4.16b, #2", "mov w20, #0x100", "movk w20, #0x302, lsl #16", - "dup v3.4s, w20", - "add v2.16b, v3.16b, v2.16b", - "tbl v16.16b, {v17.16b}, v2.16b" + "dup v4.4s, w20", + "add v5.16b, v4.16b, v3.16b", + "tbl v3.16b, {v2.16b}, v5.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpermilps ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 21, "Comment": [ "Map 2 0b01 0x0c 256-bit" ], "ExpectedArm64ASM": [ - "mov z2.s, #3", - "and z2.d, z18.d, z2.d", - "trn1 z2.b, z2.b, z2.b", - "trn1 z2.h, z2.h, z2.h", - "lsl z2.b, p7/m, z2.b, #2", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z4.s, #3", + "and z5.d, z3.d, z4.d", + "trn1 z3.b, z5.b, z5.b", + "trn1 z4.h, z3.h, z3.h", + "movprfx z3, z4", + "lsl z3.b, p7/m, z3.b, #2", "mov w20, #0x100", "movk w20, #0x302, lsl #16", - "mov z3.s, w20", - "movi v4.2d, #0x0", - "mov z5.b, #16", - "mov z1.q, q5", + "mov z4.s, w20", + "movi v5.2d, #0x0", + "mov z6.b, #16", + "mov z1.q, q6", + "mov z7.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z4.b, p0/m, z1.b", - "add z3.b, z3.b, z4.b", - "add z2.b, z3.b, z2.b", - "tbl z16.b, {z17.b}, z2.b" + "mov z7.b, p0/m, z1.b", + "add z5.b, z4.b, z7.b", + "add z4.b, z5.b, z3.b", + "tbl z3.b, {z2.b}, z4.b", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 18, "Comment": [ "Map 2 0b01 0x0d 128-bit" ], "ExpectedArm64ASM": [ - "ushr v2.2d, v18.2d, #1", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "ushr v4.2d, v3.2d, #1", "mov w0, #0x1", "dup v3.2d, x0", - "and v2.16b, v2.16b, v3.16b", - "trn1 v2.16b, v2.16b, v2.16b", - "trn1 v2.8h, v2.8h, v2.8h", - "trn1 v2.4s, v2.4s, v2.4s", - "shl v2.16b, v2.16b, #3", + "and v5.16b, v4.16b, v3.16b", + "trn1 v3.16b, v5.16b, v5.16b", + "trn1 v4.8h, v3.8h, v3.8h", + "trn1 v3.4s, v4.4s, v4.4s", + "shl v4.16b, v3.16b, #3", "mov x20, #0x100", "movk x20, #0x302, lsl #16", "movk x20, #0x504, lsl #32", "movk x20, #0x706, lsl #48", "dup v3.2d, x20", - "add v2.16b, v3.16b, v2.16b", - "tbl v16.16b, {v17.16b}, v2.16b" + "add v5.16b, v3.16b, v4.16b", + "tbl v3.16b, {v2.16b}, v5.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 21, + "ExpectedInstructionCount": 26, "Comment": [ "Map 2 0b01 0x0d 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z2, z18", - "lsr z2.d, p7/m, z2.d, #1", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z3", + "lsr z4.d, p7/m, z4.d, #1", "mov z3.d, #1", - "and z2.d, z2.d, z3.d", - "trn1 z2.b, z2.b, z2.b", - "trn1 z2.h, z2.h, z2.h", - "trn1 z2.s, z2.s, z2.s", - "lsl z2.b, p7/m, z2.b, #3", + "and z5.d, z4.d, z3.d", + "trn1 z3.b, z5.b, z5.b", + "trn1 z4.h, z3.h, z3.h", + "trn1 z3.s, z4.s, z4.s", + "movprfx z4, z3", + "lsl z4.b, p7/m, z4.b, #3", "mov x20, #0x100", "movk x20, #0x302, lsl #16", "movk x20, #0x504, lsl #32", "movk x20, #0x706, lsl #48", "mov z3.d, x20", - "movi v4.2d, #0x0", - "mov z5.b, #16", - "mov z1.q, q5", + "movi v5.2d, #0x0", + "mov z6.b, #16", + "mov z1.q, q6", + "mov z7.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z4.b, p0/m, z1.b", - "add z3.b, z3.b, z4.b", - "add z2.b, z3.b, z2.b", - "tbl z16.b, {z17.b}, z2.b" + "mov z7.b, p0/m, z1.b", + "add z5.b, z3.b, z7.b", + "add z3.b, z5.b, z4.b", + "tbl z4.b, {z2.b}, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vtestps xmm0, xmm1": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 22, "Comment": [ "Map 2 0b01 0x0e 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", "mov w20, #0x80000000", - "dup v2.4s, w20", - "and v3.16b, v17.16b, v16.16b", - "bic v4.16b, v17.16b, v16.16b", - "and v3.16b, v3.16b, v2.16b", - "and v2.16b, v4.16b, v2.16b", - "umaxv h3, v3.8h", - "umaxv h2, v2.8h", - "umov w20, v3.h[0]", + "dup v4.4s, w20", + "and v5.16b, v3.16b, v2.16b", + "bic v6.16b, v3.16b, v2.16b", + "and v2.16b, v5.16b, v4.16b", + "and v3.16b, v6.16b, v4.16b", + "umaxv h4, v2.8h", + "umaxv h2, v3.8h", + "umov w20, v4.h[0]", "umov w21, v2.h[0]", - "mov w27, #0x0", - "mov w26, #0x1", + "mov w22, #0x0", + "mov w23, #0x1", "cmp x21, #0x0 (0)", - "cset x21, eq", + "cset x24, eq", "tst w20, w20", "mrs x20, nzcv", - "orr w20, w20, w21, lsl #29", - "msr nzcv, x20" + "orr w21, w20, w24, lsl #29", + "mov x26, x23", + "mov x27, x22", + "msr nzcv, x21" ] }, "vtestps ymm0, ymm1": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 22, "Comment": [ "Map 2 0b01 0x0e 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", "mov w20, #0x80000000", - "mov z2.s, w20", - "and z3.d, z17.d, z16.d", - "bic z4.d, z17.d, z16.d", - "and z3.d, z3.d, z2.d", - "and z2.d, z4.d, z2.d", - "umaxv h3, p7, z3.h", - "umaxv h2, p7, z2.h", - "umov w20, v3.h[0]", + "mov z4.s, w20", + "and z5.d, z3.d, z2.d", + "bic z6.d, z3.d, z2.d", + "and z2.d, z5.d, z4.d", + "and z3.d, z6.d, z4.d", + "umaxv h4, p7, z2.h", + "umaxv h2, p7, z3.h", + "umov w20, v4.h[0]", "umov w21, v2.h[0]", - "mov w27, #0x0", - "mov w26, #0x1", + "mov w22, #0x0", + "mov w23, #0x1", "cmp x21, #0x0 (0)", - "cset x21, eq", + "cset x24, eq", "tst w20, w20", "mrs x20, nzcv", - "orr w20, w20, w21, lsl #29", - "msr nzcv, x20" + "orr w21, w20, w24, lsl #29", + "mov x26, x23", + "mov x27, x22", + "msr nzcv, x21" ] }, "vtestpd xmm0, xmm1": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 22, "Comment": [ "Map 2 0b01 0x0f 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", "mov x20, #0x8000000000000000", - "dup v2.2d, x20", - "and v3.16b, v17.16b, v16.16b", - "bic v4.16b, v17.16b, v16.16b", - "and v3.16b, v3.16b, v2.16b", - "and v2.16b, v4.16b, v2.16b", - "umaxv h3, v3.8h", - "umaxv h2, v2.8h", - "umov w20, v3.h[0]", + "dup v4.2d, x20", + "and v5.16b, v3.16b, v2.16b", + "bic v6.16b, v3.16b, v2.16b", + "and v2.16b, v5.16b, v4.16b", + "and v3.16b, v6.16b, v4.16b", + "umaxv h4, v2.8h", + "umaxv h2, v3.8h", + "umov w20, v4.h[0]", "umov w21, v2.h[0]", - "mov w27, #0x0", - "mov w26, #0x1", + "mov w22, #0x0", + "mov w23, #0x1", "cmp x21, #0x0 (0)", - "cset x21, eq", + "cset x24, eq", "tst w20, w20", "mrs x20, nzcv", - "orr w20, w20, w21, lsl #29", - "msr nzcv, x20" + "orr w21, w20, w24, lsl #29", + "mov x26, x23", + "mov x27, x22", + "msr nzcv, x21" ] }, "vtestpd ymm0, ymm1": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 22, "Comment": [ "Map 2 0b01 0x0f 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", "mov x20, #0x8000000000000000", - "mov z2.d, x20", - "and z3.d, z17.d, z16.d", - "bic z4.d, z17.d, z16.d", - "and z3.d, z3.d, z2.d", - "and z2.d, z4.d, z2.d", - "umaxv h3, p7, z3.h", - "umaxv h2, p7, z2.h", - "umov w20, v3.h[0]", + "mov z4.d, x20", + "and z5.d, z3.d, z2.d", + "bic z6.d, z3.d, z2.d", + "and z2.d, z5.d, z4.d", + "and z3.d, z6.d, z4.d", + "umaxv h4, p7, z2.h", + "umaxv h2, p7, z3.h", + "umov w20, v4.h[0]", "umov w21, v2.h[0]", - "mov w27, #0x0", - "mov w26, #0x1", + "mov w22, #0x0", + "mov w23, #0x1", "cmp x21, #0x0 (0)", - "cset x21, eq", + "cset x24, eq", "tst w20, w20", "mrs x20, nzcv", - "orr w20, w20, w21, lsl #29", - "msr nzcv, x20" + "orr w21, w20, w24, lsl #29", + "mov x26, x23", + "mov x27, x22", + "msr nzcv, x21" ] }, "vcvtph2ps xmm0, xmm1": { @@ -617,1155 +726,1455 @@ ] }, "vpermps ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 14, "Comment": [ "Map 2 0b01 0x16 256-bit" ], "ExpectedArm64ASM": [ - "mov z2.s, #7", - "and z2.d, z17.d, z2.d", - "trn1 z2.b, z2.b, z2.b", - "trn1 z2.h, z2.h, z2.h", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z4.s, #7", + "and z5.d, z2.d, z4.d", + "trn1 z2.b, z5.b, z5.b", + "trn1 z4.h, z2.h, z2.h", + "movprfx z2, z4", "lsl z2.b, p7/m, z2.b, #2", "mov w20, #0x100", "movk w20, #0x302, lsl #16", - "mov z3.s, w20", - "add z2.b, z2.b, z3.b", - "tbl z16.b, {z18.b}, z2.b" + "mov z4.s, w20", + "add z5.b, z2.b, z4.b", + "tbl z2.b, {z3.b}, z5.b", + "mov z16.d, p7/m, z2.d" ] }, "vptest xmm0, xmm1": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 18, "Comment": [ "Map 2 0b01 0x16 128-bit" ], "ExpectedArm64ASM": [ - "and v2.16b, v16.16b, v17.16b", - "bic v3.16b, v17.16b, v16.16b", - "umaxv h2, v2.8h", - "umaxv h3, v3.8h", + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "and v4.16b, v2.16b, v3.16b", + "bic v5.16b, v3.16b, v2.16b", + "umaxv h2, v4.8h", + "umaxv h3, v5.8h", "umov w20, v2.h[0]", "umov w21, v3.h[0]", - "mov w27, #0x0", - "mov w26, #0x1", + "mov w22, #0x0", + "mov w23, #0x1", "cmp x21, #0x0 (0)", - "cset x21, eq", + "cset x24, eq", "tst w20, w20", "mrs x20, nzcv", - "orr w20, w20, w21, lsl #29", - "msr nzcv, x20" + "orr w21, w20, w24, lsl #29", + "mov x26, x23", + "mov x27, x22", + "msr nzcv, x21" ] }, "vptest ymm0, ymm1": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 18, "Comment": [ "Map 2 0b01 0x16 256-bit" ], "ExpectedArm64ASM": [ - "and z2.d, z16.d, z17.d", - "bic z3.d, z17.d, z16.d", - "umaxv h2, p7, z2.h", - "umaxv h3, p7, z3.h", + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "and z4.d, z2.d, z3.d", + "bic z5.d, z3.d, z2.d", + "umaxv h2, p7, z4.h", + "umaxv h3, p7, z5.h", "umov w20, v2.h[0]", "umov w21, v3.h[0]", - "mov w27, #0x0", - "mov w26, #0x1", + "mov w22, #0x0", + "mov w23, #0x1", "cmp x21, #0x0 (0)", - "cset x21, eq", + "cset x24, eq", "tst w20, w20", "mrs x20, nzcv", - "orr w20, w20, w21, lsl #29", - "msr nzcv, x20" + "orr w21, w20, w24, lsl #29", + "mov x26, x23", + "mov x27, x22", + "msr nzcv, x21" ] }, "vbroadcastss xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x18 128-bit" ], "ExpectedArm64ASM": [ - "ld1r {v16.4s}, [x4]" + "mov x20, x4", + "ld1r {v2.4s}, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vbroadcastss ymm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x18 256-bit" ], "ExpectedArm64ASM": [ - "ld1rw {z16.s}, p7/z, [x4]" + "mov x20, x4", + "ld1rw {z2.s}, p7/z, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vbroadcastsd ymm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x19 256-bit" ], "ExpectedArm64ASM": [ - "ld1rd {z16.d}, p7/z, [x4]" + "mov x20, x4", + "ld1rd {z2.d}, p7/z, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vbroadcastf128 ymm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x1a 256-bit" ], "ExpectedArm64ASM": [ - "ld1rqb {z16.b}, p7/z, [x4]" + "mov x20, x4", + "ld1rqb {z2.b}, p7/z, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vpabsb xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x1c 128-bit" ], "ExpectedArm64ASM": [ - "abs v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "abs v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpabsb ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x1c 256-bit" ], "ExpectedArm64ASM": [ - "abs z16.b, p7/m, z17.b" + "mov z2.d, p7/m, z17.d", + "abs z3.b, p7/m, z2.b", + "mov z16.d, p7/m, z3.d" ] }, "vpabsw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x1d 128-bit" ], "ExpectedArm64ASM": [ - "abs v16.8h, v17.8h" + "mov z2.d, p7/m, z17.d", + "abs v3.8h, v2.8h", + "mov z16.d, p7/m, z3.d" ] }, "vpabsw ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x1d 256-bit" ], "ExpectedArm64ASM": [ - "abs z16.h, p7/m, z17.h" + "mov z2.d, p7/m, z17.d", + "abs z3.h, p7/m, z2.h", + "mov z16.d, p7/m, z3.d" ] }, "vpabsd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x1e 128-bit" ], "ExpectedArm64ASM": [ - "abs v16.4s, v17.4s" + "mov z2.d, p7/m, z17.d", + "abs v3.4s, v2.4s", + "mov z16.d, p7/m, z3.d" ] }, "vpabsd ymm0, ymm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x1e 256-bit" ], "ExpectedArm64ASM": [ - "abs z16.s, p7/m, z17.s" + "mov z2.d, p7/m, z17.d", + "abs z3.s, p7/m, z2.s", + "mov z16.d, p7/m, z3.d" ] }, "vpmovsxbw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x20 128-bit" ], "ExpectedArm64ASM": [ - "sxtl v16.8h, v17.8b" + "mov z2.d, p7/m, z17.d", + "sxtl v3.8h, v2.8b", + "mov z16.d, p7/m, z3.d" ] }, "vpmovsxbw ymm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x20 256-bit" ], "ExpectedArm64ASM": [ - "sunpklo z16.h, z17.b" + "mov z2.d, p7/m, z17.d", + "sunpklo z3.h, z2.b", + "mov z16.d, p7/m, z3.d" ] }, "vpmovsxbd xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x21 128-bit" ], "ExpectedArm64ASM": [ - "sxtl v2.8h, v17.8b", - "sxtl v16.4s, v2.4h" + "mov z2.d, p7/m, z17.d", + "sxtl v3.8h, v2.8b", + "sxtl v2.4s, v3.4h", + "mov z16.d, p7/m, z2.d" ] }, "vpmovsxbd ymm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x21 256-bit" ], "ExpectedArm64ASM": [ - "sunpklo z2.h, z17.b", - "sunpklo z16.s, z2.h" + "mov z2.d, p7/m, z17.d", + "sunpklo z3.h, z2.b", + "sunpklo z2.s, z3.h", + "mov z16.d, p7/m, z2.d" ] }, "vpmovsxbq xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b01 0x22 128-bit" ], "ExpectedArm64ASM": [ - "sxtl v2.8h, v17.8b", - "sxtl v2.4s, v2.4h", - "sxtl v16.2d, v2.2s" + "mov z2.d, p7/m, z17.d", + "sxtl v3.8h, v2.8b", + "sxtl v2.4s, v3.4h", + "sxtl v3.2d, v2.2s", + "mov z16.d, p7/m, z3.d" ] }, "vpmovsxbq ymm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b01 0x22 256-bit" ], "ExpectedArm64ASM": [ - "sunpklo z2.h, z17.b", - "sunpklo z2.s, z2.h", - "sunpklo z16.d, z2.s" + "mov z2.d, p7/m, z17.d", + "sunpklo z3.h, z2.b", + "sunpklo z2.s, z3.h", + "sunpklo z3.d, z2.s", + "mov z16.d, p7/m, z3.d" ] }, "vpmovsxwd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x23 128-bit" ], "ExpectedArm64ASM": [ - "sxtl v16.4s, v17.4h" + "mov z2.d, p7/m, z17.d", + "sxtl v3.4s, v2.4h", + "mov z16.d, p7/m, z3.d" ] }, "vpmovsxwd ymm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x23 256-bit" ], "ExpectedArm64ASM": [ - "sunpklo z16.s, z17.h" + "mov z2.d, p7/m, z17.d", + "sunpklo z3.s, z2.h", + "mov z16.d, p7/m, z3.d" ] }, "vpmovsxwq xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x24 128-bit" ], "ExpectedArm64ASM": [ - "sxtl v2.4s, v17.4h", - "sxtl v16.2d, v2.2s" + "mov z2.d, p7/m, z17.d", + "sxtl v3.4s, v2.4h", + "sxtl v2.2d, v3.2s", + "mov z16.d, p7/m, z2.d" ] }, "vpmovsxwq ymm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x24 256-bit" ], "ExpectedArm64ASM": [ - "sunpklo z2.s, z17.h", - "sunpklo z16.d, z2.s" + "mov z2.d, p7/m, z17.d", + "sunpklo z3.s, z2.h", + "sunpklo z2.d, z3.s", + "mov z16.d, p7/m, z2.d" ] }, "vpmovsxdq xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x25 128-bit" ], "ExpectedArm64ASM": [ - "sxtl v16.2d, v17.2s" + "mov z2.d, p7/m, z17.d", + "sxtl v3.2d, v2.2s", + "mov z16.d, p7/m, z3.d" ] }, "vpmovsxdq ymm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x25 256-bit" ], "ExpectedArm64ASM": [ - "sunpklo z16.d, z17.s" + "mov z2.d, p7/m, z17.d", + "sunpklo z3.d, z2.s", + "mov z16.d, p7/m, z3.d" ] }, "vpmuldq xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b01 0x28 128-bit" ], "ExpectedArm64ASM": [ - "uzp1 v2.4s, v17.4s, v17.4s", - "uzp1 v3.4s, v18.4s, v18.4s", - "smull v16.2d, v2.2s, v3.2s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uzp1 v4.4s, v2.4s, v2.4s", + "uzp1 v2.4s, v3.4s, v3.4s", + "smull v3.2d, v4.2s, v2.2s", + "mov z16.d, p7/m, z3.d" ] }, "vpmuldq ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 2 0b01 0x28 256-bit" ], "ExpectedArm64ASM": [ - "uzp1 z2.s, z17.s, z17.s", - "uzp1 z3.s, z18.s, z18.s", - "smullb z0.d, z2.s, z3.s", - "smullt z1.d, z2.s, z3.s", - "zip1 z16.d, z0.d, z1.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "uzp1 z4.s, z2.s, z2.s", + "uzp1 z2.s, z3.s, z3.s", + "smullb z0.d, z4.s, z2.s", + "smullt z1.d, z4.s, z2.s", + "zip1 z3.d, z0.d, z1.d", + "mov z16.d, p7/m, z3.d" ] }, "vpcmpeqq xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x29 128-bit" ], "ExpectedArm64ASM": [ - "cmeq v16.2d, v17.2d, v18.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "cmeq v4.2d, v2.2d, v3.2d", + "mov z16.d, p7/m, z4.d" ] }, "vpcmpeqq ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 2 0b01 0x29 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mrs x0, nzcv", - "cmpeq p0.d, p7/z, z17.d, z18.d", - "not z0.d, p0/m, z17.d", - "movprfx z16.d, p0/z, z17.d", - "orr z16.d, p0/m, z16.d, z0.d", - "msr nzcv, x0" + "cmpeq p0.d, p7/z, z2.d, z3.d", + "not z0.d, p0/m, z2.d", + "movprfx z4.d, p0/z, z2.d", + "orr z4.d, p0/m, z4.d, z0.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vmovntdqa xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x2a 128-bit" ], "ExpectedArm64ASM": [ - "ldr q16, [x4]" + "mov x20, x4", + "ldr q2, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vmovntdqa ymm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x2a 256-bit" ], "ExpectedArm64ASM": [ - "ld1b {z16.b}, p7/z, [x4]" + "mov x20, x4", + "ld1b {z2.b}, p7/z, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vpackusdw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b01 0x2b 128-bit" ], "ExpectedArm64ASM": [ - "sqxtun v16.4h, v17.4s", - "sqxtun2 v16.8h, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqxtun v4.4h, v2.4s", + "sqxtun2 v4.8h, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vpackusdw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 19, + "ExpectedInstructionCount": 22, "Comment": [ "Map 2 0b01 0x2b 256-bit" ], "ExpectedArm64ASM": [ - "sqxtunb z1.h, z18.s", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "sqxtunb z1.h, z3.s", "uzp1 z1.h, z1.h, z1.h", - "sqxtunb z2.h, z17.s", - "uzp1 z2.h, z2.h, z2.h", - "splice z2.h, p6, z2.h, z1.h", - "mov z1.d, z2.d[1]", - "mov z3.d, z2.d", + "sqxtunb z4.h, z2.s", + "uzp1 z4.h, z4.h, z4.h", + "splice z4.h, p6, z4.h, z1.h", + "mov z1.d, z4.d[1]", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z3.d, p0/m, z1.d", + "mov z2.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z2.d[2]", - "mov z16.d, z3.d", + "mov z1.d, z4.d[2]", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vmaskmovps xmm0, xmm1, [rax]": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 2 0b01 0x2c 128-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.s, p6/z, z17.s, #0", - "ld1w {z2.s}, p0/z, [x4]", - "mov v16.16b, v2.16b", - "msr nzcv, x20" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.s, p6/z, z2.s, #0", + "ld1w {z3.s}, p0/z, [x20]", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d", + "msr nzcv, x21" ] }, "vmaskmovps ymm0, ymm1, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x2c 256-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.s, p7/z, z17.s, #0", - "ld1w {z16.s}, p0/z, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.s, p7/z, z2.s, #0", + "ld1w {z3.s}, p0/z, [x20]", + "mov z16.d, p7/m, z3.d", + "msr nzcv, x21" ] }, "vmaskmovpd xmm0, xmm1, [rax]": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 2 0b01 0x2d 128-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.d, p6/z, z17.d, #0", - "ld1d {z2.d}, p0/z, [x4]", - "mov v16.16b, v2.16b", - "msr nzcv, x20" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.d, p6/z, z2.d, #0", + "ld1d {z3.d}, p0/z, [x20]", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d", + "msr nzcv, x21" ] }, "vmaskmovpd ymm0, ymm1, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x2d 256-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.d, p7/z, z17.d, #0", - "ld1d {z16.d}, p0/z, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.d, p7/z, z2.d, #0", + "ld1d {z3.d}, p0/z, [x20]", + "mov z16.d, p7/m, z3.d", + "msr nzcv, x21" ] }, "vmaskmovps [rax], xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x2e 128-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.s, p6/z, z16.s, #0", - "st1w {z17.s}, p0, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.s, p6/z, z2.s, #0", + "st1w {z3.s}, p0, [x20]", + "msr nzcv, x21" ] }, "vmaskmovps [rax], ymm0, ymm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x2e 256-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.s, p7/z, z16.s, #0", - "st1w {z17.s}, p0, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.s, p7/z, z2.s, #0", + "st1w {z3.s}, p0, [x20]", + "msr nzcv, x21" ] }, "vmaskmovpd [rax], xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x2f 128-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.d, p6/z, z16.d, #0", - "st1d {z17.d}, p0, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.d, p6/z, z2.d, #0", + "st1d {z3.d}, p0, [x20]", + "msr nzcv, x21" ] }, "vmaskmovpd [rax], ymm0, ymm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x2f 256-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.d, p7/z, z16.d, #0", - "st1d {z17.d}, p0, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.d, p7/z, z2.d, #0", + "st1d {z3.d}, p0, [x20]", + "msr nzcv, x21" ] }, "vpmovzxbw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x30 128-bit" ], "ExpectedArm64ASM": [ - "uxtl v16.8h, v17.8b" + "mov z2.d, p7/m, z17.d", + "uxtl v3.8h, v2.8b", + "mov z16.d, p7/m, z3.d" ] }, "vpmovzxbw ymm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x30 256-bit" ], "ExpectedArm64ASM": [ - "uunpklo z16.h, z17.b" + "mov z2.d, p7/m, z17.d", + "uunpklo z3.h, z2.b", + "mov z16.d, p7/m, z3.d" ] }, "vpmovzxbd xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x31 128-bit" ], "ExpectedArm64ASM": [ - "uxtl v2.8h, v17.8b", - "uxtl v16.4s, v2.4h" + "mov z2.d, p7/m, z17.d", + "uxtl v3.8h, v2.8b", + "uxtl v2.4s, v3.4h", + "mov z16.d, p7/m, z2.d" ] }, "vpmovzxbd ymm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x31 256-bit" ], "ExpectedArm64ASM": [ - "uunpklo z2.h, z17.b", - "uunpklo z16.s, z2.h" + "mov z2.d, p7/m, z17.d", + "uunpklo z3.h, z2.b", + "uunpklo z2.s, z3.h", + "mov z16.d, p7/m, z2.d" ] }, "vpmovzxbq xmm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b01 0x32 128-bit" ], "ExpectedArm64ASM": [ - "uxtl v2.8h, v17.8b", - "uxtl v2.4s, v2.4h", - "uxtl v16.2d, v2.2s" + "mov z2.d, p7/m, z17.d", + "uxtl v3.8h, v2.8b", + "uxtl v2.4s, v3.4h", + "uxtl v3.2d, v2.2s", + "mov z16.d, p7/m, z3.d" ] }, "vpmovzxbq ymm0, xmm1": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b01 0x32 256-bit" ], "ExpectedArm64ASM": [ - "uunpklo z2.h, z17.b", - "uunpklo z2.s, z2.h", - "uunpklo z16.d, z2.s" + "mov z2.d, p7/m, z17.d", + "uunpklo z3.h, z2.b", + "uunpklo z2.s, z3.h", + "uunpklo z3.d, z2.s", + "mov z16.d, p7/m, z3.d" ] }, "vpmovzxwd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x33 128-bit" ], "ExpectedArm64ASM": [ - "uxtl v16.4s, v17.4h" + "mov z2.d, p7/m, z17.d", + "uxtl v3.4s, v2.4h", + "mov z16.d, p7/m, z3.d" ] }, "vpmovzxwd ymm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x33 256-bit" ], "ExpectedArm64ASM": [ - "uunpklo z16.s, z17.h" + "mov z2.d, p7/m, z17.d", + "uunpklo z3.s, z2.h", + "mov z16.d, p7/m, z3.d" ] }, "vpmovzxwq xmm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x34 128-bit" ], "ExpectedArm64ASM": [ - "uxtl v2.4s, v17.4h", - "uxtl v16.2d, v2.2s" + "mov z2.d, p7/m, z17.d", + "uxtl v3.4s, v2.4h", + "uxtl v2.2d, v3.2s", + "mov z16.d, p7/m, z2.d" ] }, "vpmovzxwq ymm0, xmm1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x34 256-bit" ], "ExpectedArm64ASM": [ - "uunpklo z2.s, z17.h", - "uunpklo z16.d, z2.s" + "mov z2.d, p7/m, z17.d", + "uunpklo z3.s, z2.h", + "uunpklo z2.d, z3.s", + "mov z16.d, p7/m, z2.d" ] }, "vpmovzxdq xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x35 128-bit" ], "ExpectedArm64ASM": [ - "uxtl v16.2d, v17.2s" + "mov z2.d, p7/m, z17.d", + "uxtl v3.2d, v2.2s", + "mov z16.d, p7/m, z3.d" ] }, "vpmovzxdq ymm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x35 256-bit" ], "ExpectedArm64ASM": [ - "uunpklo z16.d, z17.s" + "mov z2.d, p7/m, z17.d", + "uunpklo z3.d, z2.s", + "mov z16.d, p7/m, z3.d" ] }, "vpermd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 14, "Comment": [ "Map 2 0b01 0x36 256-bit" ], "ExpectedArm64ASM": [ - "mov z2.s, #7", - "and z2.d, z17.d, z2.d", - "trn1 z2.b, z2.b, z2.b", - "trn1 z2.h, z2.h, z2.h", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z4.s, #7", + "and z5.d, z2.d, z4.d", + "trn1 z2.b, z5.b, z5.b", + "trn1 z4.h, z2.h, z2.h", + "movprfx z2, z4", "lsl z2.b, p7/m, z2.b, #2", "mov w20, #0x100", "movk w20, #0x302, lsl #16", - "mov z3.s, w20", - "add z2.b, z2.b, z3.b", - "tbl z16.b, {z18.b}, z2.b" + "mov z4.s, w20", + "add z5.b, z2.b, z4.b", + "tbl z2.b, {z3.b}, z5.b", + "mov z16.d, p7/m, z2.d" ] }, "vpcmpgtq xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x37 128-bit" ], "ExpectedArm64ASM": [ - "cmgt v16.2d, v17.2d, v18.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "cmgt v4.2d, v2.2d, v3.2d", + "mov z16.d, p7/m, z4.d" ] }, "vpcmpgtq ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 2 0b01 0x37 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mrs x0, nzcv", - "cmpgt p0.d, p7/z, z17.d, z18.d", - "not z0.d, p0/m, z17.d", - "movprfx z16.d, p0/z, z17.d", - "orr z16.d, p0/m, z16.d, z0.d", - "msr nzcv, x0" + "cmpgt p0.d, p7/z, z2.d, z3.d", + "not z0.d, p0/m, z2.d", + "movprfx z4.d, p0/z, z2.d", + "orr z4.d, p0/m, z4.d, z0.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vpminsb xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x38 128-bit" ], "ExpectedArm64ASM": [ - "smin v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "smin v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpminsb ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 2 0b01 0x38 256-bit" ], "ExpectedArm64ASM": [ - "smin z16.b, p7/m, z16.b, z17.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "movprfx z4, z2", + "smin z4.b, p7/m, z4.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpminsb ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 2 0b01 0x38 256-bit" ], "ExpectedArm64ASM": [ - "smin z16.b, p7/m, z16.b, z18.b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "smin z4.b, p7/m, z4.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpminsb ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b01 0x38 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "smin z16.b, p7/m, z16.b, z18.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "smin z4.b, p7/m, z4.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpminsd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x39 128-bit" ], "ExpectedArm64ASM": [ - "smin v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "smin v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vpminsd ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 2 0b01 0x39 256-bit" ], "ExpectedArm64ASM": [ - "smin z16.s, p7/m, z16.s, z17.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "movprfx z4, z2", + "smin z4.s, p7/m, z4.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vpminsd ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 2 0b01 0x39 256-bit" ], "ExpectedArm64ASM": [ - "smin z16.s, p7/m, z16.s, z18.s" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "smin z4.s, p7/m, z4.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vpminsd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b01 0x39 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "smin z16.s, p7/m, z16.s, z18.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "smin z4.s, p7/m, z4.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vpminuw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x3a 128-bit" ], "ExpectedArm64ASM": [ - "umin v16.8h, v17.8h, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "umin v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpminuw ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 2 0b01 0x3a 256-bit" ], "ExpectedArm64ASM": [ - "umin z16.h, p7/m, z16.h, z17.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "movprfx z4, z2", + "umin z4.h, p7/m, z4.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpminuw ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 2 0b01 0x3a 256-bit" ], "ExpectedArm64ASM": [ - "umin z16.h, p7/m, z16.h, z18.h" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "umin z4.h, p7/m, z4.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpminuw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b01 0x3a 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "umin z16.h, p7/m, z16.h, z18.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "umin z4.h, p7/m, z4.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpminud xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x3b 128-bit" ], "ExpectedArm64ASM": [ - "umin v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "umin v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vpminud ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 2 0b01 0x3b 256-bit" ], "ExpectedArm64ASM": [ - "umin z16.s, p7/m, z16.s, z17.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "movprfx z4, z2", + "umin z4.s, p7/m, z4.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vpminud ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 2 0b01 0x3b 256-bit" ], "ExpectedArm64ASM": [ - "umin z16.s, p7/m, z16.s, z18.s" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "umin z4.s, p7/m, z4.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vpminud ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b01 0x3b 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "umin z16.s, p7/m, z16.s, z18.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "umin z4.s, p7/m, z4.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxsb xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x3c 128-bit" ], "ExpectedArm64ASM": [ - "smax v16.16b, v17.16b, v18.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "smax v4.16b, v2.16b, v3.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxsb ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 2 0b01 0x3c 256-bit" ], "ExpectedArm64ASM": [ - "smax z16.b, p7/m, z16.b, z18.b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "smax z4.b, p7/m, z4.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxsb ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b01 0x3c 256-bit" ], "ExpectedArm64ASM": [ - "smax z16.b, p7/m, z16.b, z17.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "movprfx z4, z2", + "smax z4.b, p7/m, z4.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxsb ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b01 0x3c 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "smax z16.b, p7/m, z16.b, z18.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "smax z4.b, p7/m, z4.b, z3.b", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxsd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x3d 128-bit" ], "ExpectedArm64ASM": [ - "smax v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "smax v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxsd ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 2 0b01 0x3d 256-bit" ], "ExpectedArm64ASM": [ - "smax z16.s, p7/m, z16.s, z17.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "movprfx z4, z2", + "smax z4.s, p7/m, z4.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxsd ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 2 0b01 0x3d 256-bit" ], "ExpectedArm64ASM": [ - "smax z16.s, p7/m, z16.s, z18.s" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "smax z4.s, p7/m, z4.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxsd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b01 0x3d 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "smax z16.s, p7/m, z16.s, z18.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "smax z4.s, p7/m, z4.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxuw xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x3e 128-bit" ], "ExpectedArm64ASM": [ - "umax v16.8h, v17.8h, v18.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "umax v4.8h, v2.8h, v3.8h", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxuw ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 2 0b01 0x3e 256-bit" ], "ExpectedArm64ASM": [ - "umax z16.h, p7/m, z16.h, z17.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "movprfx z4, z2", + "umax z4.h, p7/m, z4.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxuw ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 2 0b01 0x3e 256-bit" ], "ExpectedArm64ASM": [ - "umax z16.h, p7/m, z16.h, z18.h" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "umax z4.h, p7/m, z4.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxuw ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b01 0x3e 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "umax z16.h, p7/m, z16.h, z18.h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "umax z4.h, p7/m, z4.h, z3.h", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxud xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x3f 128-bit" ], "ExpectedArm64ASM": [ - "umax v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "umax v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxud ymm0, ymm0, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Aliasing source and destination", "Map 2 0b01 0x3f 256-bit" ], "ExpectedArm64ASM": [ - "umax z16.s, p7/m, z16.s, z18.s" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "umax z4.s, p7/m, z4.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxud ymm0, ymm1, ymm0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b01 0x3f 256-bit" ], "ExpectedArm64ASM": [ - "umax z16.s, p7/m, z16.s, z17.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z16.d", + "movprfx z4, z2", + "umax z4.s, p7/m, z4.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vpmaxud ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b01 0x3f 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "umax z16.s, p7/m, z16.s, z18.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movprfx z4, z2", + "umax z4.s, p7/m, z4.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vpmulld xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x40 128-bit" ], "ExpectedArm64ASM": [ - "mul v16.4s, v17.4s, v18.4s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mul v4.4s, v2.4s, v3.4s", + "mov z16.d, p7/m, z4.d" ] }, "vpmulld ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0x40 256-bit" ], "ExpectedArm64ASM": [ - "mul z16.s, z17.s, z18.s" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mul z4.s, z2.s, z3.s", + "mov z16.d, p7/m, z4.d" ] }, "vphminposuw xmm0, xmm1": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 8, "Comment": [ "Map 2 0b01 0x41 256-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2048]", - "zip1 v3.8h, v2.8h, v17.8h", - "zip2 v2.8h, v2.8h, v17.8h", - "umin v2.4s, v3.4s, v2.4s", - "uminv s2, v2.4s", - "rev32 v16.8h, v2.8h" + "mov z2.d, p7/m, z17.d", + "ldr q3, [x28, #2048]", + "zip1 v4.8h, v3.8h, v2.8h", + "zip2 v5.8h, v3.8h, v2.8h", + "umin v2.4s, v4.4s, v5.4s", + "uminv s3, v2.4s", + "rev32 v2.8h, v3.8h", + "mov z16.d, p7/m, z2.d" ] }, "vpsrlvd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x45 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "movi v0.4s, #0x20, lsl #0", - "umin v0.4s, v0.4s, v18.4s", + "umin v0.4s, v0.4s, v3.4s", "neg v0.4s, v0.4s", - "ushl v16.4s, v17.4s, v0.4s" + "ushl v4.4s, v2.4s, v0.4s", + "mov z16.d, p7/m, z4.d" ] }, "vpsrlvd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x45 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mov z1.s, #32", - "umin z1.s, p7/m, z1.s, z18.s", - "movprfx z16, z17", - "lsr z16.s, p7/m, z16.s, z1.s" + "umin z1.s, p7/m, z1.s, z3.s", + "movprfx z4, z2", + "lsr z4.s, p7/m, z4.s, z1.s", + "mov z16.d, p7/m, z4.d" ] }, "vpsrlvq xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 9, "Comment": [ "Map 2 0b01 0x45 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mov w0, #0x40", "dup v0.2d, x0", - "cmhi v1.2d, v18.2d, v0.2d", - "bif v0.16b, v18.16b, v1.16b", + "cmhi v1.2d, v3.2d, v0.2d", + "bif v0.16b, v3.16b, v1.16b", "neg v0.2d, v0.2d", - "ushl v16.2d, v17.2d, v0.2d" + "ushl v4.2d, v2.2d, v0.2d", + "mov z16.d, p7/m, z4.d" ] }, "vpsrlvq ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x45 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mov z1.d, #64", - "umin z1.d, p7/m, z1.d, z18.d", - "movprfx z16, z17", - "lsr z16.d, p7/m, z16.d, z1.d" + "umin z1.d, p7/m, z1.d, z3.d", + "movprfx z4, z2", + "lsr z4.d, p7/m, z4.d, z1.d", + "mov z16.d, p7/m, z4.d" ] }, "vpsravd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x46 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "movi v0.4s, #0x1f, lsl #0", - "umin v0.4s, v0.4s, v18.4s", + "umin v0.4s, v0.4s, v3.4s", "neg v0.4s, v0.4s", - "sshl v16.4s, v17.4s, v0.4s" + "sshl v4.4s, v2.4s, v0.4s", + "mov z16.d, p7/m, z4.d" ] }, "vpsravd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mov z0.s, #31", - "umin z0.s, p7/m, z0.s, z18.s", - "movprfx z16, z17", - "asr z16.s, p7/m, z16.s, z0.s" + "umin z0.s, p7/m, z0.s, z3.s", + "movprfx z4, z2", + "asr z4.s, p7/m, z4.s, z0.s", + "mov z16.d, p7/m, z4.d" ] }, "vpsllvd xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b01 0x47 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "movi v0.4s, #0x20, lsl #0", - "umin v0.4s, v0.4s, v18.4s", - "ushl v16.4s, v17.4s, v0.4s" + "umin v0.4s, v0.4s, v3.4s", + "ushl v4.4s, v2.4s, v0.4s", + "mov z16.d, p7/m, z4.d" ] }, "vpsllvd ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x47 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mov z1.s, #32", - "umin z1.s, p7/m, z1.s, z18.s", - "movprfx z16, z17", - "lsl z16.s, p7/m, z16.s, z1.s" + "umin z1.s, p7/m, z1.s, z3.s", + "movprfx z4, z2", + "lsl z4.s, p7/m, z4.s, z1.s", + "mov z16.d, p7/m, z4.d" ] }, "vpsllvq xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 2 0b01 0x47 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mov w0, #0x40", "dup v0.2d, x0", - "cmhi v1.2d, v18.2d, v0.2d", - "bif v0.16b, v18.16b, v1.16b", - "ushl v16.2d, v17.2d, v0.2d" + "cmhi v1.2d, v3.2d, v0.2d", + "bif v0.16b, v3.16b, v1.16b", + "ushl v4.2d, v2.2d, v0.2d", + "mov z16.d, p7/m, z4.d" ] }, "vpsllvq ymm0, ymm1, ymm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x47 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", "mov z1.d, #64", - "umin z1.d, p7/m, z1.d, z18.d", - "movprfx z16, z17", - "lsl z16.d, p7/m, z16.d, z1.d" + "umin z1.d, p7/m, z1.d, z3.d", + "movprfx z4, z2", + "lsl z4.d, p7/m, z4.d, z1.d", + "mov z16.d, p7/m, z4.d" ] }, "vpbroadcastd xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x58 128-bit" ], "ExpectedArm64ASM": [ - "dup v16.4s, v17.s[0]" + "mov z2.d, p7/m, z17.d", + "dup v3.4s, v2.s[0]", + "mov z16.d, p7/m, z3.d" ] }, "vpbroadcastd xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x58 128-bit" ], "ExpectedArm64ASM": [ - "ld1r {v16.4s}, [x4]" + "mov x20, x4", + "ld1r {v2.4s}, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vpbroadcastd ymm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x58 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.s, s17" + "mov z2.d, p7/m, z17.d", + "mov z3.s, s2", + "mov z16.d, p7/m, z3.d" ] }, "vpbroadcastd ymm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x58 256-bit" ], "ExpectedArm64ASM": [ - "ld1rw {z16.s}, p7/z, [x4]" + "mov x20, x4", + "ld1rw {z2.s}, p7/z, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vpbroadcastq xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x59 128-bit" ], "ExpectedArm64ASM": [ - "dup v16.2d, v17.d[0]" + "mov z2.d, p7/m, z17.d", + "dup v3.2d, v2.d[0]", + "mov z16.d, p7/m, z3.d" ] }, "vpbroadcastq xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x59 128-bit" ], "ExpectedArm64ASM": [ - "ld1r {v16.2d}, [x4]" + "mov x20, x4", + "ld1r {v2.2d}, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vpbroadcastq ymm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x59 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, d17" + "mov z2.d, p7/m, z17.d", + "mov z3.d, d2", + "mov z16.d, p7/m, z3.d" ] }, "vpbroadcastq ymm0, [rax]": { @@ -1773,45 +2182,55 @@ "Comment": [ "Map 2 0b01 0x59 256-bit" ], - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ld1rd {z16.d}, p7/z, [x4]" + "mov x20, x4", + "ld1rd {z2.d}, p7/z, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vbroadcasti128 ymm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x5a 256-bit" ], "ExpectedArm64ASM": [ - "ld1rqb {z16.b}, p7/z, [x4]" + "mov x20, x4", + "ld1rqb {z2.b}, p7/z, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vpbroadcastb xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x78 128-bit" ], "ExpectedArm64ASM": [ - "dup v16.16b, v17.b[0]" + "mov z2.d, p7/m, z17.d", + "dup v3.16b, v2.b[0]", + "mov z16.d, p7/m, z3.d" ] }, "vpbroadcastb xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x78 128-bit" ], "ExpectedArm64ASM": [ - "ld1r {v16.16b}, [x4]" + "mov x20, x4", + "ld1r {v2.16b}, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vpbroadcastb ymm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x78 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.b, b17" + "mov z2.d, p7/m, z17.d", + "mov z3.b, b2", + "mov z16.d, p7/m, z3.d" ] }, "vpbroadcastb ymm0, [rax]": { @@ -1819,36 +2238,44 @@ "Comment": [ "Map 2 0b01 0x78 256-bit" ], - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ld1rb {z16.b}, p7/z, [x4]" + "mov x20, x4", + "ld1rb {z2.b}, p7/z, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vpbroadcastw xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x79 128-bit" ], "ExpectedArm64ASM": [ - "dup v16.8h, v17.h[0]" + "mov z2.d, p7/m, z17.d", + "dup v3.8h, v2.h[0]", + "mov z16.d, p7/m, z3.d" ] }, "vpbroadcastw xmm0, [rax]": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x79 128-bit" ], "ExpectedArm64ASM": [ - "ld1r {v16.8h}, [x4]" + "mov x20, x4", + "ld1r {v2.8h}, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vpbroadcastw ymm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0x79 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.h, h17" + "mov z2.d, p7/m, z17.d", + "mov z3.h, h2", + "mov z16.d, p7/m, z3.d" ] }, "vpbroadcastw ymm0, [rax]": { @@ -1856,107 +2283,133 @@ "Comment": [ "Map 2 0b01 0x79 256-bit" ], - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "ExpectedArm64ASM": [ - "ld1rh {z16.h}, p7/z, [x4]" + "mov x20, x4", + "ld1rh {z2.h}, p7/z, [x20]", + "mov z16.d, p7/m, z2.d" ] }, "vpmaskmovd xmm0, xmm1, [rax]": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 2 0b01 0x8c 128-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.s, p6/z, z17.s, #0", - "ld1w {z2.s}, p0/z, [x4]", - "mov v16.16b, v2.16b", - "msr nzcv, x20" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.s, p6/z, z2.s, #0", + "ld1w {z3.s}, p0/z, [x20]", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d", + "msr nzcv, x21" ] }, "vpmaskmovd ymm0, ymm1, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x8c 256-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.s, p7/z, z17.s, #0", - "ld1w {z16.s}, p0/z, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.s, p7/z, z2.s, #0", + "ld1w {z3.s}, p0/z, [x20]", + "mov z16.d, p7/m, z3.d", + "msr nzcv, x21" ] }, "vpmaskmovq xmm0, xmm1, [rax]": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 2 0b01 0x8c 128-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.d, p6/z, z17.d, #0", - "ld1d {z2.d}, p0/z, [x4]", - "mov v16.16b, v2.16b", - "msr nzcv, x20" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.d, p6/z, z2.d, #0", + "ld1d {z3.d}, p0/z, [x20]", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d", + "msr nzcv, x21" ] }, "vpmaskmovq ymm0, ymm1, [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x8c 256-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.d, p7/z, z17.d, #0", - "ld1d {z16.d}, p0/z, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.d, p7/z, z2.d, #0", + "ld1d {z3.d}, p0/z, [x20]", + "mov z16.d, p7/m, z3.d", + "msr nzcv, x21" ] }, "vpmaskmovd [rax], xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x8e 128-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.s, p6/z, z16.s, #0", - "st1w {z17.s}, p0, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.s, p6/z, z2.s, #0", + "st1w {z3.s}, p0, [x20]", + "msr nzcv, x21" ] }, "vpmaskmovd [rax], ymm0, ymm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x8e 256-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.s, p7/z, z16.s, #0", - "st1w {z17.s}, p0, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.s, p7/z, z2.s, #0", + "st1w {z3.s}, p0, [x20]", + "msr nzcv, x21" ] }, "vpmaskmovq [rax], xmm0, xmm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x8e 128-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.d, p6/z, z16.d, #0", - "st1d {z17.d}, p0, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.d, p6/z, z2.d, #0", + "st1d {z3.d}, p0, [x20]", + "msr nzcv, x21" ] }, "vpmaskmovq [rax], ymm0, ymm1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0x8e 256-bit" ], "ExpectedArm64ASM": [ - "mrs x20, nzcv", - "cmplt p0.d, p7/z, z16.d, #0", - "st1d {z17.d}, p0, [x4]", - "msr nzcv, x20" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "mov x20, x4", + "mrs x21, nzcv", + "cmplt p0.d, p7/z, z2.d, #0", + "st1d {z3.d}, p0, [x20]", + "msr nzcv, x21" ] }, "vpgatherdd xmm0, [xmm1*1 + rax], xmm2": { @@ -3080,25 +3533,30 @@ ] }, "vaesimc xmm0, xmm1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 2 0b01 0xdb 128-bit" ], "ExpectedArm64ASM": [ - "unimplemented (Unimplemented)" + "mov z2.d, p7/m, z17.d", + "unimplemented (Unimplemented)", + "mov z16.d, p7/m, z3.d" ] }, "vaesenc xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 2 0b01 0xdc 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v0.16b, v17.16b", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov v0.16b, v2.16b", "unimplemented (Unimplemented)", "unimplemented (Unimplemented)", - "eor v16.16b, v0.16b, v18.16b" + "eor v5.16b, v0.16b, v3.16b", + "mov z16.d, p7/m, z5.d" ] }, "vaesenc ymm0, ymm1, ymm2": { @@ -3109,15 +3567,18 @@ ] }, "vaesenclast xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0xdd 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v0.16b, v17.16b", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov v0.16b, v2.16b", "unimplemented (Unimplemented)", - "eor v16.16b, v0.16b, v18.16b" + "eor v5.16b, v0.16b, v3.16b", + "mov z16.d, p7/m, z5.d" ] }, "vaesenclast ymm0, ymm1, ymm2": { @@ -3128,16 +3589,19 @@ ] }, "vaesdec xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 8, "Comment": [ "Map 2 0b01 0xde 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v0.16b, v17.16b", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov v0.16b, v2.16b", "unimplemented (Unimplemented)", "unimplemented (Unimplemented)", - "eor v16.16b, v0.16b, v18.16b" + "eor v5.16b, v0.16b, v3.16b", + "mov z16.d, p7/m, z5.d" ] }, "vaesdec ymm0, ymm1, ymm2": { @@ -3148,15 +3612,18 @@ ] }, "vaesdeclast xmm0, xmm1, xmm2": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b01 0xdf 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v0.16b, v17.16b", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov v0.16b, v2.16b", "unimplemented (Unimplemented)", - "eor v16.16b, v0.16b, v18.16b" + "eor v5.16b, v0.16b, v3.16b", + "mov z16.d, p7/m, z5.d" ] }, "vaesdeclast ymm0, ymm1, ymm2": { @@ -3167,374 +3634,462 @@ ] }, "andn eax, ebx, ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b00 0xf2 32-bit" ], "ExpectedArm64ASM": [ - "bic w4, w5, w7", - "mov x26, x4", - "tst w4, w4" + "mov x20, x7", + "mov x21, x5", + "bic w22, w21, w20", + "mov x4, x22", + "mov x26, x22", + "tst w22, w22" ] }, "andn rax, rbx, rcx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b00 0xf2 64-bit" ], "ExpectedArm64ASM": [ - "bic x4, x5, x7", - "mov x26, x4", - "tst x4, x4" + "mov x20, x7", + "mov x21, x5", + "bic x22, x21, x20", + "mov x4, x22", + "mov x26, x22", + "tst x22, x22" ] }, "bzhi eax, ebx, ecx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 13, "Comment": [ "Map 2 0b00 0xf5 32-bit" ], "ExpectedArm64ASM": [ - "mov w20, #0xffffffff", - "lsl w20, w20, w5", - "bic w20, w7, w20", - "tst x5, #0xe0", - "csel w4, w7, w20, ne", + "mov x20, x7", + "mov x21, x5", + "mov w22, #0xffffffff", + "lsl w23, w22, w21", + "bic w22, w20, w23", + "tst x21, #0xe0", + "csel w21, w20, w22, ne", + "mov x4, x21", "cset w20, ne", - "tst w4, w4", + "tst w21, w21", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "orr w22, w21, w20, lsl #29", + "msr nzcv, x22" ] }, "bzhi rax, rbx, rcx": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 13, "Comment": [ "Map 2 0b00 0xf5 64-bit" ], "ExpectedArm64ASM": [ - "mov x20, #0xffffffffffffffff", - "lsl x20, x20, x5", - "bic x20, x7, x20", - "tst x5, #0xc0", - "csel x4, x7, x20, ne", + "mov x20, x7", + "mov x21, x5", + "mov x22, #0xffffffffffffffff", + "lsl x23, x22, x21", + "bic x22, x20, x23", + "tst x21, #0xc0", + "csel x21, x20, x22, ne", + "mov x4, x21", "cset w20, ne", - "tst x4, x4", + "tst x21, x21", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "orr w22, w21, w20, lsl #29", + "msr nzcv, x22" ] }, "pext eax, ebx, ecx": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 15, "Comment": [ "Map 2 0b10 0xf5 32-bit" ], "ExpectedArm64ASM": [ - "cbz w5, #+0x2c", - "mov w0, w5", - "mov w2, w7", - "mov w4, wzr", + "mov x20, x7", + "mov x21, x5", + "cbz w21, #+0x2c", + "mov w0, w21", + "mov w2, w20", + "mov w22, wzr", "cbz w0, #+0x20", "clz w1, w0", "lsl w2, w2, w1", "lsl w0, w0, w1", - "extr w4, w4, w2, #31", + "extr w22, w22, w2, #31", "bfc w0, #31, #1", "b #-0x18", - "mov w4, wzr" + "mov w22, wzr", + "mov x4, x22" ] }, "pext rax, rbx, rcx": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 15, "Comment": [ "Map 2 0b10 0xf5 64-bit" ], "ExpectedArm64ASM": [ - "cbz x5, #+0x2c", - "mov x0, x5", - "mov x2, x7", - "mov x4, xzr", + "mov x20, x7", + "mov x21, x5", + "cbz x21, #+0x2c", + "mov x0, x21", + "mov x2, x20", + "mov x22, xzr", "cbz x0, #+0x20", "clz x1, x0", "lsl x2, x2, x1", "lsl x0, x0, x1", - "extr x4, x4, x2, #63", + "extr x22, x22, x2, #63", "bfc x0, #63, #1", "b #-0x18", - "mov x4, xzr" + "mov x22, xzr", + "mov x4, x22" ] }, "pdep eax, ebx, ecx": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "Map 2 0b11 0xf5 32-bit" ], "ExpectedArm64ASM": [ - "mov x0, x7", - "mov x1, x5", - "mov w4, #0x0", - "cbz w5, #+0x2c", + "mov x20, x7", + "mov x21, x5", + "mov x0, x20", + "mov x1, x21", + "mov w22, #0x0", + "cbz w21, #+0x2c", "neg w2, w1", "and w2, w2, w1", "sbfx w3, w0, #0, #1", "eor w1, w1, w2", "and w2, w3, w2", "neg w3, w1", - "orr w4, w4, w2", + "orr w22, w22, w2", "lsr w0, w0, #1", "and w2, w1, w3", - "cbnz w2, #-0x1c" + "cbnz w2, #-0x1c", + "mov x4, x22" ] }, "pdep rax, rbx, rcx": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "Map 2 0b11 0xf5 64-bit" ], "ExpectedArm64ASM": [ - "mov x0, x7", - "mov x1, x5", - "mov x4, #0x0", - "cbz x5, #+0x2c", + "mov x20, x7", + "mov x21, x5", + "mov x0, x20", + "mov x1, x21", + "mov x22, #0x0", + "cbz x21, #+0x2c", "neg x2, x1", "and x2, x2, x1", "sbfx x3, x0, #0, #1", "eor x1, x1, x2", "and x2, x3, x2", "neg x3, x1", - "orr x4, x4, x2", + "orr x22, x22, x2", "lsr x0, x0, #1", "and x2, x1, x3", - "cbnz x2, #-0x1c" + "cbnz x2, #-0x1c", + "mov x4, x22" ] }, "mulx eax, ebx, ecx": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "Map 2 0b11 0xf6 32-bit" ], "ExpectedArm64ASM": [ - "mul w7, w5, w6", - "ubfx x0, x5, #0, #32", - "ubfx x1, x6, #0, #32", - "mul x4, x0, x1", - "lsr x4, x4, #32" + "mov x20, x5", + "mov x21, x6", + "mul w22, w20, w21", + "ubfx x0, x20, #0, #32", + "ubfx x1, x21, #0, #32", + "mul x23, x0, x1", + "lsr x23, x23, #32", + "mov x7, x22", + "mov x4, x23" ] }, "mulx eax, eax, ebx": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Same two destinations should only compute high part", "Map 2 0b11 0xf6 32-bit" ], "ExpectedArm64ASM": [ - "ubfx x0, x7, #0, #32", - "ubfx x1, x6, #0, #32", - "mul x4, x0, x1", - "lsr x4, x4, #32" + "mov x20, x7", + "mov x21, x6", + "ubfx x0, x20, #0, #32", + "ubfx x1, x21, #0, #32", + "mul x22, x0, x1", + "lsr x22, x22, #32", + "mov x4, x22" ] }, "mulx eax, ebx, [ecx]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "Map 2 0b11 0xf6 32-bit" ], "ExpectedArm64ASM": [ - "mov w20, w5", - "ldr w20, [x20]", - "mul w7, w20, w6", + "mov x20, x5", + "mov w21, w20", + "ldr w20, [x21]", + "mov x21, x6", + "mul w22, w20, w21", "ubfx x0, x20, #0, #32", - "ubfx x1, x6, #0, #32", - "mul x4, x0, x1", - "lsr x4, x4, #32" + "ubfx x1, x21, #0, #32", + "mul x23, x0, x1", + "lsr x23, x23, #32", + "mov x7, x22", + "mov x4, x23" ] }, "mulx rax, rbx, rcx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b11 0xf6 64-bit" ], "ExpectedArm64ASM": [ - "mul x7, x5, x6", - "umulh x4, x5, x6" + "mov x20, x5", + "mov x21, x6", + "mul x22, x20, x21", + "umulh x23, x20, x21", + "mov x7, x22", + "mov x4, x23" ] }, "mulx rax, rax, rbx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Same two destinations should only compute high part", "Map 2 0b11 0xf6 64-bit" ], "ExpectedArm64ASM": [ - "umulh x4, x7, x6" + "mov x20, x7", + "mov x21, x6", + "umulh x22, x20, x21", + "mov x4, x22" ] }, "mulx rax, rbx, [rcx]": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "Map 2 0b11 0xf6 64-bit" ], "ExpectedArm64ASM": [ - "ldr x20, [x5]", - "mul x7, x20, x6", - "umulh x4, x20, x6" + "mov x20, x5", + "ldr x21, [x20]", + "mov x20, x6", + "mul x22, x21, x20", + "umulh x23, x21, x20", + "mov x7, x22", + "mov x4, x23" ] }, "bextr eax, ebx, ecx": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 15, "Comment": [ "Map 2 0b00 0xf7 32-bit" ], "ExpectedArm64ASM": [ - "uxtb w20, w5", - "lsr w21, w7, w20", - "mov w22, #0x0", + "mov x20, x7", + "mov x21, x5", + "uxtb w22, w21", + "lsr w23, w20, w22", + "mov w20, #0x0", + "cmp w22, #0x1f (31)", + "csel w24, w23, w20, ls", + "ubfx w20, w21, #8, #8", + "mov x21, #0xffffffffffffffff", + "lsl w22, w21, w20", + "bic w21, w24, w22", "cmp w20, #0x1f (31)", - "csel w20, w21, w22, ls", - "ubfx w21, w5, #8, #8", - "mov x22, #0xffffffffffffffff", - "lsl w22, w22, w21", - "bic w22, w20, w22", - "cmp w21, #0x1f (31)", - "csel w4, w22, w20, ls", - "tst w4, w4" + "csel w22, w21, w24, ls", + "mov x4, x22", + "tst w22, w22" ] }, "bextr rax, rbx, rcx": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 15, "Comment": [ "Map 2 0b00 0xf7 64-bit" ], "ExpectedArm64ASM": [ - "uxtb x20, w5", - "lsr x21, x7, x20", - "mov w22, #0x0", + "mov x20, x7", + "mov x21, x5", + "uxtb x22, w21", + "lsr x23, x20, x22", + "mov w20, #0x0", + "cmp x22, #0x3f (63)", + "csel x24, x23, x20, ls", + "ubfx x20, x21, #8, #8", + "mov x21, #0xffffffffffffffff", + "lsl x22, x21, x20", + "bic x21, x24, x22", "cmp x20, #0x3f (63)", - "csel x20, x21, x22, ls", - "ubfx x21, x5, #8, #8", - "mov x22, #0xffffffffffffffff", - "lsl x22, x22, x21", - "bic x22, x20, x22", - "cmp x21, #0x3f (63)", - "csel x4, x22, x20, ls", - "tst x4, x4" + "csel x22, x21, x24, ls", + "mov x4, x22", + "tst x22, x22" ] }, "shlx eax, ebx, ecx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0xf7 32-bit" ], "ExpectedArm64ASM": [ - "lsl w4, w7, w5" + "mov x20, x7", + "mov x21, x5", + "lsl w22, w20, w21", + "mov x4, x22" ] }, "shlx eax, [ebx], ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b01 0xf7 32-bit" ], "ExpectedArm64ASM": [ - "mov w20, w7", - "ldr w20, [x20]", - "lsl w4, w20, w5" + "mov x20, x7", + "mov w21, w20", + "ldr w20, [x21]", + "mov x21, x5", + "lsl w22, w20, w21", + "mov x4, x22" ] }, "shlx rax, rbx, rcx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b01 0xf7 64-bit" ], "ExpectedArm64ASM": [ - "lsl x4, x7, x5" + "mov x20, x7", + "mov x21, x5", + "lsl x22, x20, x21", + "mov x4, x22" ] }, "shlx rax, [rbx], rcx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b01 0xf7 64-bit" ], "ExpectedArm64ASM": [ - "ldr x20, [x7]", - "lsl x4, x20, x5" + "mov x20, x7", + "ldr x21, [x20]", + "mov x20, x5", + "lsl x22, x21, x20", + "mov x4, x22" ] }, "sarx eax, ebx, ecx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b10 0xf7 32-bit" ], "ExpectedArm64ASM": [ - "asr w4, w7, w5" + "mov x20, x7", + "mov x21, x5", + "asr w22, w20, w21", + "mov x4, x22" ] }, "sarx eax, [ebx], ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b10 0xf7 32-bit" ], "ExpectedArm64ASM": [ - "mov w20, w7", - "ldr w20, [x20]", - "asr w4, w20, w5" + "mov x20, x7", + "mov w21, w20", + "ldr w20, [x21]", + "mov x21, x5", + "asr w22, w20, w21", + "mov x4, x22" ] }, "sarx rax, rbx, rcx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b10 0xf7 64-bit" ], "ExpectedArm64ASM": [ - "asr x4, x7, x5" + "mov x20, x7", + "mov x21, x5", + "asr x22, x20, x21", + "mov x4, x22" ] }, "sarx rax, [rbx], rcx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b10 0xf7 64-bit" ], "ExpectedArm64ASM": [ - "ldr x20, [x7]", - "asr x4, x20, x5" + "mov x20, x7", + "ldr x21, [x20]", + "mov x20, x5", + "asr x22, x21, x20", + "mov x4, x22" ] }, "shrx eax, ebx, ecx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b11 0xf7 32-bit" ], "ExpectedArm64ASM": [ - "lsr w4, w7, w5" + "mov x20, x7", + "mov x21, x5", + "lsr w22, w20, w21", + "mov x4, x22" ] }, "shrx eax, [ebx], ecx": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 2 0b11 0xf7 32-bit" ], "ExpectedArm64ASM": [ - "mov w20, w7", - "ldr w20, [x20]", - "lsr w4, w20, w5" + "mov x20, x7", + "mov w21, w20", + "ldr w20, [x21]", + "mov x21, x5", + "lsr w22, w20, w21", + "mov x4, x22" ] }, "shrx rax, rbx, rcx": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 2 0b11 0xf7 64-bit" ], "ExpectedArm64ASM": [ - "lsr x4, x7, x5" + "mov x20, x7", + "mov x21, x5", + "lsr x22, x20, x21", + "mov x4, x22" ] }, "shrx rax, [rbx], rcx": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 2 0b11 0xf7 64-bit" ], "ExpectedArm64ASM": [ - "ldr x20, [x7]", - "lsr x4, x20, x5" + "mov x20, x7", + "ldr x21, [x20]", + "mov x20, x5", + "lsr x22, x21, x20", + "mov x4, x22" ] } } diff --git a/unittests/InstructionCountCI/VEX_map3.json b/unittests/InstructionCountCI/VEX_map3.json index 2a58bbef8b..ca573c1450 100644 --- a/unittests/InstructionCountCI/VEX_map3.json +++ b/unittests/InstructionCountCI/VEX_map3.json @@ -11,289 +11,407 @@ }, "Instructions": { "vpermq ymm0, ymm1, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x00 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, d17" + "mov z2.d, p7/m, z17.d", + "mov z3.d, d2", + "mov z16.d, p7/m, z3.d" ] }, "vpermq ymm0, ymm1, 01010101b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x00 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, z17.d[1]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, z2.d[1]", + "mov z16.d, p7/m, z3.d" ] }, "vpermq ymm0, ymm1, 10101010b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x00 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, z17.d[2]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, z2.d[2]", + "mov z16.d, p7/m, z3.d" ] }, "vpermq ymm0, ymm1, 11111111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x00 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, z17.d[3]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, z2.d[3]", + "mov z16.d, p7/m, z3.d" ] }, "vpermpd ymm0, ymm1, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x01 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, d17" + "mov z2.d, p7/m, z17.d", + "mov z3.d, d2", + "mov z16.d, p7/m, z3.d" ] }, "vpermpd ymm0, ymm1, 01010101b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x01 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, z17.d[1]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, z2.d[1]", + "mov z16.d, p7/m, z3.d" ] }, "vpermpd ymm0, ymm1, 10101010b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x01 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, z17.d[2]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, z2.d[2]", + "mov z16.d, p7/m, z3.d" ] }, "vpermpd ymm0, ymm1, 11111111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x01 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, z17.d[3]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, z2.d[3]", + "mov z16.d, p7/m, z3.d" ] }, "vpblendd xmm0, xmm1, 0000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpblendd xmm0, xmm1, 0001b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 13, "Comment": [ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v17.s[0]", - "mov v2.s[1], v16.s[1]", - "mov v2.s[2], v16.s[2]", - "mov v2.s[3], v16.s[3]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v3.s[0]", + "mov v3.16b, v5.16b", + "mov v3.s[1], v2.s[1]", + "mov v4.16b, v3.16b", + "mov v4.s[2], v2.s[2]", + "mov v3.16b, v4.16b", + "mov v3.s[3], v2.s[3]", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpblendd xmm0, xmm1, 0010b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 13, "Comment": [ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v16.s[0]", - "mov v2.s[1], v17.s[1]", - "mov v2.s[2], v16.s[2]", - "mov v2.s[3], v16.s[3]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v2.s[0]", + "mov v4.16b, v5.16b", + "mov v4.s[1], v3.s[1]", + "mov v3.16b, v4.16b", + "mov v3.s[2], v2.s[2]", + "mov v4.16b, v3.16b", + "mov v4.s[3], v2.s[3]", + "mov v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpblendd xmm0, xmm1, 0011b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 13, "Comment": [ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v17.s[0]", - "mov v2.s[1], v17.s[1]", - "mov v2.s[2], v16.s[2]", - "mov v2.s[3], v16.s[3]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v3.s[0]", + "mov v4.16b, v5.16b", + "mov v4.s[1], v3.s[1]", + "mov v3.16b, v4.16b", + "mov v3.s[2], v2.s[2]", + "mov v4.16b, v3.16b", + "mov v4.s[3], v2.s[3]", + "mov v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpblendd xmm0, xmm1, 0100b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 13, "Comment": [ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v16.s[0]", - "mov v2.s[1], v16.s[1]", - "mov v2.s[2], v17.s[2]", - "mov v2.s[3], v16.s[3]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v2.s[0]", + "mov v4.16b, v5.16b", + "mov v4.s[1], v2.s[1]", + "mov v5.16b, v4.16b", + "mov v5.s[2], v3.s[2]", + "mov v3.16b, v5.16b", + "mov v3.s[3], v2.s[3]", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpblendd xmm0, xmm1, 0101b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 13, "Comment": [ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v17.s[0]", - "mov v2.s[1], v16.s[1]", - "mov v2.s[2], v17.s[2]", - "mov v2.s[3], v16.s[3]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v3.s[0]", + "mov v4.16b, v5.16b", + "mov v4.s[1], v2.s[1]", + "mov v5.16b, v4.16b", + "mov v5.s[2], v3.s[2]", + "mov v3.16b, v5.16b", + "mov v3.s[3], v2.s[3]", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpblendd xmm0, xmm1, 0110b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 13, "Comment": [ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v16.s[0]", - "mov v2.s[1], v17.s[1]", - "mov v2.s[2], v17.s[2]", - "mov v2.s[3], v16.s[3]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v2.s[0]", + "mov v4.16b, v5.16b", + "mov v4.s[1], v3.s[1]", + "mov v5.16b, v4.16b", + "mov v5.s[2], v3.s[2]", + "mov v3.16b, v5.16b", + "mov v3.s[3], v2.s[3]", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpblendd xmm0, xmm1, 0111b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 13, "Comment": [ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v17.s[0]", - "mov v2.s[1], v17.s[1]", - "mov v2.s[2], v17.s[2]", - "mov v2.s[3], v16.s[3]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v3.s[0]", + "mov v4.16b, v5.16b", + "mov v4.s[1], v3.s[1]", + "mov v5.16b, v4.16b", + "mov v5.s[2], v3.s[2]", + "mov v3.16b, v5.16b", + "mov v3.s[3], v2.s[3]", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpblendd xmm0, xmm1, 1000b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 13, "Comment": [ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v16.s[0]", - "mov v2.s[1], v16.s[1]", - "mov v2.s[2], v16.s[2]", - "mov v2.s[3], v17.s[3]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v2.s[0]", + "mov v4.16b, v5.16b", + "mov v4.s[1], v2.s[1]", + "mov v5.16b, v4.16b", + "mov v5.s[2], v2.s[2]", + "mov v2.16b, v5.16b", + "mov v2.s[3], v3.s[3]", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpblendd xmm0, xmm1, 1001b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 13, "Comment": [ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v17.s[0]", - "mov v2.s[1], v16.s[1]", - "mov v2.s[2], v16.s[2]", - "mov v2.s[3], v17.s[3]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v3.s[0]", + "mov v4.16b, v5.16b", + "mov v4.s[1], v2.s[1]", + "mov v5.16b, v4.16b", + "mov v5.s[2], v2.s[2]", + "mov v2.16b, v5.16b", + "mov v2.s[3], v3.s[3]", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpblendd xmm0, xmm1, 1010b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 13, "Comment": [ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v16.s[0]", - "mov v2.s[1], v17.s[1]", - "mov v2.s[2], v16.s[2]", - "mov v2.s[3], v17.s[3]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v2.s[0]", + "mov v4.16b, v5.16b", + "mov v4.s[1], v3.s[1]", + "mov v5.16b, v4.16b", + "mov v5.s[2], v2.s[2]", + "mov v2.16b, v5.16b", + "mov v2.s[3], v3.s[3]", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpblendd xmm0, xmm1, 1011b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 13, "Comment": [ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v17.s[0]", - "mov v2.s[1], v17.s[1]", - "mov v2.s[2], v16.s[2]", - "mov v2.s[3], v17.s[3]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v3.s[0]", + "mov v4.16b, v5.16b", + "mov v4.s[1], v3.s[1]", + "mov v5.16b, v4.16b", + "mov v5.s[2], v2.s[2]", + "mov v2.16b, v5.16b", + "mov v2.s[3], v3.s[3]", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpblendd xmm0, xmm1, 1100b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 13, "Comment": [ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v16.s[0]", - "mov v2.s[1], v16.s[1]", - "mov v2.s[2], v17.s[2]", - "mov v2.s[3], v17.s[3]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v2.s[0]", + "mov v4.16b, v5.16b", + "mov v4.s[1], v2.s[1]", + "mov v2.16b, v4.16b", + "mov v2.s[2], v3.s[2]", + "mov v4.16b, v2.16b", + "mov v4.s[3], v3.s[3]", + "mov v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpblendd xmm0, xmm1, 1101b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 13, "Comment": [ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v17.s[0]", - "mov v2.s[1], v16.s[1]", - "mov v2.s[2], v17.s[2]", - "mov v2.s[3], v17.s[3]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v3.s[0]", + "mov v4.16b, v5.16b", + "mov v4.s[1], v2.s[1]", + "mov v2.16b, v4.16b", + "mov v2.s[2], v3.s[2]", + "mov v4.16b, v2.16b", + "mov v4.s[3], v3.s[3]", + "mov v2.16b, v4.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpblendd xmm0, xmm1, 1110b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 13, "Comment": [ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v16.s[0]", - "mov v2.s[1], v17.s[1]", - "mov v2.s[2], v17.s[2]", - "mov v2.s[3], v17.s[3]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v2.s[0]", + "mov v2.16b, v5.16b", + "mov v2.s[1], v3.s[1]", + "mov v4.16b, v2.16b", + "mov v4.s[2], v3.s[2]", + "mov v2.16b, v4.16b", + "mov v2.s[3], v3.s[3]", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpblendd xmm0, xmm1, 1111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x02 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpblendd ymm0, ymm1, 00000000b": { @@ -307,2757 +425,3274 @@ ] }, "vpblendd ymm0, ymm1, 01010101b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 60, "Comment": [ "Map 3 0b01 0x02 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.s, s17", + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movi v4.2d, #0x0", + "mov z1.s, s3", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z16.s[1]", + "mov z1.s, z2.s[1]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[2]", + "mov z1.s, z3.s[2]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z16.s[3]", + "mov z1.s, z2.s[3]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z3.s[4]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z16.s[5]", + "mov z1.s, z2.s[5]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[6]", + "mov z1.s, z3.s[6]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z16.s[7]", - "mov z16.d, z2.d", + "mov z1.s, z2.s[7]", + "mov z3.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", - "mov z16.s, p0/m, z1.s", - "msr nzcv, x0" + "mov z3.s, p0/m, z1.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpblendd ymm0, ymm1, 10101010b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 60, "Comment": [ "Map 3 0b01 0x02 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.s, s16", + "mov z2.d, p7/m, z16.d", + "mov z3.d, p7/m, z17.d", + "movi v4.2d, #0x0", + "mov z1.s, s2", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[1]", + "mov z1.s, z3.s[1]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z16.s[2]", + "mov z1.s, z2.s[2]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[3]", + "mov z1.s, z3.s[3]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z16.s[4]", + "mov z1.s, z2.s[4]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[5]", + "mov z1.s, z3.s[5]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z16.s[6]", + "mov z1.s, z2.s[6]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[7]", - "mov z16.d, z2.d", + "mov z1.s, z3.s[7]", + "mov z2.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", - "mov z16.s, p0/m, z1.s", - "msr nzcv, x0" + "mov z2.s, p0/m, z1.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z2.d" ] }, "vpblendd ymm0, ymm1, 11111111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x02 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "mov z16.d, p7/m, z2.d" ] }, "vpermilps xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x03 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v17.s[0]", - "mov v2.s[1], v17.s[0]", - "mov v2.s[2], v17.s[0]", - "mov v16.16b, v2.16b", - "mov v16.s[3], v17.s[0]" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.s[0], v2.s[0]", + "mov v3.16b, v4.16b", + "mov v3.s[1], v2.s[0]", + "mov v4.16b, v3.16b", + "mov v4.s[2], v2.s[0]", + "mov v3.16b, v4.16b", + "mov v3.s[3], v2.s[0]", + "mov z16.d, p7/m, z3.d" ] }, "vpermilps xmm0, xmm1, 01010101b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x03 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v17.s[1]", - "mov v2.s[1], v17.s[1]", - "mov v2.s[2], v17.s[1]", - "mov v16.16b, v2.16b", - "mov v16.s[3], v17.s[1]" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.s[0], v2.s[1]", + "mov v3.16b, v4.16b", + "mov v3.s[1], v2.s[1]", + "mov v4.16b, v3.16b", + "mov v4.s[2], v2.s[1]", + "mov v3.16b, v4.16b", + "mov v3.s[3], v2.s[1]", + "mov z16.d, p7/m, z3.d" ] }, "vpermilps xmm0, xmm1, 10101010b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x03 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v17.s[2]", - "mov v2.s[1], v17.s[2]", - "mov v2.s[2], v17.s[2]", - "mov v16.16b, v2.16b", - "mov v16.s[3], v17.s[2]" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.s[0], v2.s[2]", + "mov v3.16b, v4.16b", + "mov v3.s[1], v2.s[2]", + "mov v4.16b, v3.16b", + "mov v4.s[2], v2.s[2]", + "mov v3.16b, v4.16b", + "mov v3.s[3], v2.s[2]", + "mov z16.d, p7/m, z3.d" ] }, "vpermilps xmm0, xmm1, 11111111b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x03 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v17.s[3]", - "mov v2.s[1], v17.s[3]", - "mov v2.s[2], v17.s[3]", - "mov v16.16b, v2.16b", - "mov v16.s[3], v17.s[3]" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.s[0], v2.s[3]", + "mov v3.16b, v4.16b", + "mov v3.s[1], v2.s[3]", + "mov v4.16b, v3.16b", + "mov v4.s[2], v2.s[3]", + "mov v3.16b, v4.16b", + "mov v3.s[3], v2.s[3]", + "mov z16.d, p7/m, z3.d" ] }, "vpermilps ymm0, ymm1, 00000000b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 59, "Comment": [ "Map 3 0b01 0x03 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.s, s17", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.s, s2", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s17", + "mov z1.s, s2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", - "mov z16.d, z2.d", + "mov z1.s, z2.s[4]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", - "mov z16.s, p0/m, z1.s", - "msr nzcv, x0" + "mov z3.s, p0/m, z1.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilps ymm0, ymm1, 01010101b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 59, "Comment": [ "Map 3 0b01 0x03 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.s, z17.s[1]", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.s, z2.s[1]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[1]", + "mov z1.s, z2.s[1]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[1]", + "mov z1.s, z2.s[1]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[1]", + "mov z1.s, z2.s[1]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[5]", + "mov z1.s, z2.s[5]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[5]", + "mov z1.s, z2.s[5]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[5]", + "mov z1.s, z2.s[5]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[5]", - "mov z16.d, z2.d", + "mov z1.s, z2.s[5]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", - "mov z16.s, p0/m, z1.s", - "msr nzcv, x0" + "mov z3.s, p0/m, z1.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilps ymm0, ymm1, 10101010b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 59, "Comment": [ "Map 3 0b01 0x03 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.s, z17.s[2]", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.s, z2.s[2]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[2]", + "mov z1.s, z2.s[2]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[2]", + "mov z1.s, z2.s[2]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[2]", + "mov z1.s, z2.s[2]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[6]", + "mov z1.s, z2.s[6]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[6]", + "mov z1.s, z2.s[6]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[6]", + "mov z1.s, z2.s[6]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[6]", - "mov z16.d, z2.d", + "mov z1.s, z2.s[6]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", - "mov z16.s, p0/m, z1.s", - "msr nzcv, x0" + "mov z3.s, p0/m, z1.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilps ymm0, ymm1, 11111111b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 59, "Comment": [ "Map 3 0b01 0x03 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.s, z17.s[3]", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.s, z2.s[3]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[3]", + "mov z1.s, z2.s[3]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[3]", + "mov z1.s, z2.s[3]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[3]", + "mov z1.s, z2.s[3]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[7]", + "mov z1.s, z2.s[7]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[7]", + "mov z1.s, z2.s[7]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", - "mov z2.s, p0/m, z1.s", + "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[7]", + "mov z1.s, z2.s[7]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[7]", - "mov z16.d, z2.d", + "mov z1.s, z2.s[7]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", - "mov z16.s, p0/m, z1.s", - "msr nzcv, x0" + "mov z3.s, p0/m, z1.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd xmm0, xmm1, 00b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0x05 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.d[0], v17.d[0]", - "mov v16.16b, v2.16b", - "mov v16.d[1], v17.d[0]" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v2.d[0]", + "mov v3.16b, v4.16b", + "mov v3.d[1], v2.d[0]", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd xmm0, xmm1, 01b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0x05 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.d[0], v17.d[1]", - "mov v16.16b, v2.16b", - "mov v16.d[1], v17.d[0]" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v2.d[1]", + "mov v3.16b, v4.16b", + "mov v3.d[1], v2.d[0]", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd xmm0, xmm1, 10b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0x05 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.d[0], v17.d[0]", - "mov v16.16b, v2.16b", - "mov v16.d[1], v17.d[1]" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v2.d[0]", + "mov v3.16b, v4.16b", + "mov v3.d[1], v2.d[1]", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd xmm0, xmm1, 11b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0x05 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.d[0], v17.d[1]", - "mov v16.16b, v2.16b", - "mov v16.d[1], v17.d[1]" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v2.d[1]", + "mov v3.16b, v4.16b", + "mov v3.d[1], v2.d[1]", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd ymm0, ymm1, 0000b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 3 0b01 0x05 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d17", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.d, d2", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, d17", + "mov z1.d, d2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", + "mov z1.d, z2.d[2]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[2]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd ymm0, ymm1, 0001b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 3 0b01 0x05 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, z17.d[1]", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.d, z2.d[1]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, d17", + "mov z1.d, d2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", + "mov z1.d, z2.d[2]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[2]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd ymm0, ymm1, 0010b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 3 0b01 0x05 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d17", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.d, d2", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[1]", + "mov z1.d, z2.d[1]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", + "mov z1.d, z2.d[2]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[2]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd ymm0, ymm1, 0011b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 3 0b01 0x05 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, z17.d[1]", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.d, z2.d[1]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[1]", + "mov z1.d, z2.d[1]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", + "mov z1.d, z2.d[2]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[2]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd ymm0, ymm1, 0100b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 3 0b01 0x05 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d17", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.d, d2", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, d17", + "mov z1.d, d2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", + "mov z1.d, z2.d[3]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[2]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd ymm0, ymm1, 0101b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 3 0b01 0x05 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, z17.d[1]", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.d, z2.d[1]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, d17", + "mov z1.d, d2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", + "mov z1.d, z2.d[3]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[2]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd ymm0, ymm1, 0110b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 3 0b01 0x05 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d17", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.d, d2", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[1]", + "mov z1.d, z2.d[1]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", + "mov z1.d, z2.d[3]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[2]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd ymm0, ymm1, 0111b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 3 0b01 0x05 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, z17.d[1]", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.d, z2.d[1]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[1]", + "mov z1.d, z2.d[1]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", + "mov z1.d, z2.d[3]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[2]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd ymm0, ymm1, 1000b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 3 0b01 0x05 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d17", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.d, d2", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, d17", + "mov z1.d, d2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", + "mov z1.d, z2.d[2]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[3]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd ymm0, ymm1, 1001b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 3 0b01 0x05 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, z17.d[1]", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.d, z2.d[1]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, d17", + "mov z1.d, d2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", + "mov z1.d, z2.d[2]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[3]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd ymm0, ymm1, 1010b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 3 0b01 0x05 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d17", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.d, d2", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[1]", + "mov z1.d, z2.d[1]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", + "mov z1.d, z2.d[2]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[3]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd ymm0, ymm1, 1011b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 3 0b01 0x05 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, z17.d[1]", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.d, z2.d[1]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[1]", + "mov z1.d, z2.d[1]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", + "mov z1.d, z2.d[2]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[3]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd ymm0, ymm1, 1100b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 3 0b01 0x05 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d17", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.d, d2", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, d17", + "mov z1.d, d2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", + "mov z1.d, z2.d[3]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[3]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd ymm0, ymm1, 1101b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 3 0b01 0x05 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, z17.d[1]", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.d, z2.d[1]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, d17", + "mov z1.d, d2", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", + "mov z1.d, z2.d[3]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[3]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd ymm0, ymm1, 1110b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 3 0b01 0x05 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d17", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.d, d2", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[1]", + "mov z1.d, z2.d[1]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", + "mov z1.d, z2.d[3]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[3]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpermilpd ymm0, ymm1, 1111b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 31, "Comment": [ "Map 3 0b01 0x05 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, z17.d[1]", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.d, z2.d[1]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[1]", + "mov z1.d, z2.d[1]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", + "mov z1.d, z2.d[3]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[3]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00000000b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q17", - "mov z2.b, p6/m, z1.b", - "mov z1.q, q17", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.q, q2", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z1.q, q2", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00000001b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z17.q[1]", - "mov z2.b, p6/m, z1.b", - "mov z1.q, q17", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z1.q, q2", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00000010b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q18", - "mov z2.b, p6/m, z1.b", - "mov z1.q, q17", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.q, q3", + "mov z5.d, z4.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, q2", + "mov z3.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00000011b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z18.q[1]", - "mov z2.b, p6/m, z1.b", - "mov z1.q, q17", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.q, z3.q[1]", + "mov z5.d, z4.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, q2", + "mov z3.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00010000b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q17", - "mov z2.b, p6/m, z1.b", - "mov z1.q, z17.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.q, q2", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z1.q, z2.q[1]", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00010001b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z17.q[1]", - "mov z2.b, p6/m, z1.b", - "mov z1.q, z17.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z1.q, z2.q[1]", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00010010b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q18", - "mov z2.b, p6/m, z1.b", - "mov z1.q, z17.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.q, q3", + "mov z5.d, z4.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, z2.q[1]", + "mov z3.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00010011b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z18.q[1]", - "mov z2.b, p6/m, z1.b", - "mov z1.q, z17.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.q, z3.q[1]", + "mov z5.d, z4.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, z2.q[1]", + "mov z3.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00100000b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q17", - "mov z2.b, p6/m, z1.b", - "mov z1.q, q18", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.q, q2", + "mov z5.d, z4.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, q3", + "mov z2.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00100001b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z17.q[1]", - "mov z2.b, p6/m, z1.b", - "mov z1.q, q18", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z5.d, z4.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, q3", + "mov z2.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00100010b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q18", - "mov z2.b, p6/m, z1.b", - "mov z1.q, q18", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z18.d", + "movi v3.2d, #0x0", + "mov z1.q, q2", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z1.q, q2", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00100011b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z18.q[1]", - "mov z2.b, p6/m, z1.b", - "mov z1.q, q18", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z18.d", + "movi v3.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z1.q, q2", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00110000b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q17", - "mov z2.b, p6/m, z1.b", - "mov z1.q, z18.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.q, q2", + "mov z5.d, z4.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, z3.q[1]", + "mov z2.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00110001b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z17.q[1]", - "mov z2.b, p6/m, z1.b", - "mov z1.q, z18.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z5.d, z4.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, z3.q[1]", + "mov z2.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00110010b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q18", - "mov z2.b, p6/m, z1.b", - "mov z1.q, z18.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z18.d", + "movi v3.2d, #0x0", + "mov z1.q, q2", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z1.q, z2.q[1]", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00110011b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z18.q[1]", - "mov z2.b, p6/m, z1.b", - "mov z1.q, z18.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z18.d", + "movi v3.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z1.q, z2.q[1]", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00001000b": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q17", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.q, q2", + "mov z4.d, z3.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z4.b, p0/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00011000b": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z17.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z4.d, z3.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z4.b, p0/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00101000b": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q18", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z18.d", + "movi v3.2d, #0x0", + "mov z1.q, q2", + "mov z4.d, z3.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z4.b, p0/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 00111000b": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z18.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z18.d", + "movi v3.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z4.d, z3.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z4.b, p0/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 10001000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov z16.d, p7/m, z2.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 10000000b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q17", - "mov z16.d, z2.d", - "mov z16.b, p6/m, z1.b" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.q, q2", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 10000001b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z17.q[1]", - "mov z16.d, z2.d", - "mov z16.b, p6/m, z1.b" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 10000010b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q18", - "mov z16.d, z2.d", - "mov z16.b, p6/m, z1.b" + "mov z2.d, p7/m, z18.d", + "movi v3.2d, #0x0", + "mov z1.q, q2", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vperm2f128 ymm0, ymm1, ymm2, 10000011b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "Map 3 0b01 0x06 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z18.q[1]", - "mov z16.d, z2.d", - "mov z16.b, p6/m, z1.b" + "mov z2.d, p7/m, z18.d", + "movi v3.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vroundps xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "nearest rounding", "Map 3 0b01 0x08 128-bit" ], "ExpectedArm64ASM": [ - "frintn v16.4s, v17.4s" + "mov z2.d, p7/m, z17.d", + "frintn v3.4s, v2.4s", + "mov z16.d, p7/m, z3.d" ] }, "vroundps xmm0, xmm1, 00000001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "-inf rounding", "Map 3 0b01 0x08 128-bit" ], "ExpectedArm64ASM": [ - "frintm v16.4s, v17.4s" + "mov z2.d, p7/m, z17.d", + "frintm v3.4s, v2.4s", + "mov z16.d, p7/m, z3.d" ] }, "vroundps xmm0, xmm1, 00000010b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "+inf rounding", "Map 3 0b01 0x08 128-bit" ], "ExpectedArm64ASM": [ - "frintp v16.4s, v17.4s" + "mov z2.d, p7/m, z17.d", + "frintp v3.4s, v2.4s", + "mov z16.d, p7/m, z3.d" ] }, "vroundps xmm0, xmm1, 00000011b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "truncate rounding", "Map 3 0b01 0x08 128-bit" ], "ExpectedArm64ASM": [ - "frintz v16.4s, v17.4s" + "mov z2.d, p7/m, z17.d", + "frintz v3.4s, v2.4s", + "mov z16.d, p7/m, z3.d" ] }, "vroundps xmm0, xmm1, 00000100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "host mode rounding", "Map 3 0b01 0x08 128-bit" ], "ExpectedArm64ASM": [ - "frinti v16.4s, v17.4s" + "mov z2.d, p7/m, z17.d", + "frinti v3.4s, v2.4s", + "mov z16.d, p7/m, z3.d" ] }, "vroundps ymm0, ymm1, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "nearest rounding", "Map 3 0b01 0x08 256-bit" ], "ExpectedArm64ASM": [ - "frintn z16.s, p7/m, z17.s" + "mov z2.d, p7/m, z17.d", + "frintn z3.s, p7/m, z2.s", + "mov z16.d, p7/m, z3.d" ] }, "vroundps ymm0, ymm1, 00000001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "-inf rounding", "Map 3 0b01 0x08 256-bit" ], "ExpectedArm64ASM": [ - "frintm z16.s, p7/m, z17.s" + "mov z2.d, p7/m, z17.d", + "frintm z3.s, p7/m, z2.s", + "mov z16.d, p7/m, z3.d" ] }, "vroundps ymm0, ymm1, 00000010b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "+inf rounding", "Map 3 0b01 0x08 256-bit" ], "ExpectedArm64ASM": [ - "frintp z16.s, p7/m, z17.s" + "mov z2.d, p7/m, z17.d", + "frintp z3.s, p7/m, z2.s", + "mov z16.d, p7/m, z3.d" ] }, "vroundps ymm0, ymm1, 00000011b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "truncate rounding", "Map 3 0b01 0x08 256-bit" ], "ExpectedArm64ASM": [ - "frintz z16.s, p7/m, z17.s" + "mov z2.d, p7/m, z17.d", + "frintz z3.s, p7/m, z2.s", + "mov z16.d, p7/m, z3.d" ] }, "vroundps ymm0, ymm1, 00000100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "host mode rounding", "Map 3 0b01 0x08 256-bit" ], "ExpectedArm64ASM": [ - "frinti z16.s, p7/m, z17.s" + "mov z2.d, p7/m, z17.d", + "frinti z3.s, p7/m, z2.s", + "mov z16.d, p7/m, z3.d" ] }, "vroundpd xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "nearest rounding", "Map 3 0b01 0x09 128-bit" ], "ExpectedArm64ASM": [ - "frintn v16.2d, v17.2d" + "mov z2.d, p7/m, z17.d", + "frintn v3.2d, v2.2d", + "mov z16.d, p7/m, z3.d" ] }, "vroundpd xmm0, xmm1, 00000001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "-inf rounding", "Map 3 0b01 0x09 128-bit" ], "ExpectedArm64ASM": [ - "frintm v16.2d, v17.2d" + "mov z2.d, p7/m, z17.d", + "frintm v3.2d, v2.2d", + "mov z16.d, p7/m, z3.d" ] }, "vroundpd xmm0, xmm1, 00000010b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "+inf rounding", "Map 3 0b01 0x09 128-bit" ], "ExpectedArm64ASM": [ - "frintp v16.2d, v17.2d" + "mov z2.d, p7/m, z17.d", + "frintp v3.2d, v2.2d", + "mov z16.d, p7/m, z3.d" ] }, "vroundpd xmm0, xmm1, 00000011b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "truncate rounding", "Map 3 0b01 0x09 128-bit" ], "ExpectedArm64ASM": [ - "frintz v16.2d, v17.2d" + "mov z2.d, p7/m, z17.d", + "frintz v3.2d, v2.2d", + "mov z16.d, p7/m, z3.d" ] }, "vroundpd xmm0, xmm1, 00000100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "host mode rounding", "Map 3 0b01 0x09 128-bit" ], "ExpectedArm64ASM": [ - "frinti v16.2d, v17.2d" + "mov z2.d, p7/m, z17.d", + "frinti v3.2d, v2.2d", + "mov z16.d, p7/m, z3.d" ] }, "vroundpd ymm0, ymm1, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "nearest rounding", "Map 3 0b01 0x09 256-bit" ], "ExpectedArm64ASM": [ - "frintn z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "frintn z3.d, p7/m, z2.d", + "mov z16.d, p7/m, z3.d" ] }, "vroundpd ymm0, ymm1, 00000001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "-inf rounding", "Map 3 0b01 0x09 256-bit" ], "ExpectedArm64ASM": [ - "frintm z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "frintm z3.d, p7/m, z2.d", + "mov z16.d, p7/m, z3.d" ] }, "vroundpd ymm0, ymm1, 00000010b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "+inf rounding", "Map 3 0b01 0x09 256-bit" ], "ExpectedArm64ASM": [ - "frintp z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "frintp z3.d, p7/m, z2.d", + "mov z16.d, p7/m, z3.d" ] }, "vroundpd ymm0, ymm1, 00000011b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "truncate rounding", "Map 3 0b01 0x09 256-bit" ], "ExpectedArm64ASM": [ - "frintz z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "frintz z3.d, p7/m, z2.d", + "mov z16.d, p7/m, z3.d" ] }, "vroundpd ymm0, ymm1, 00000100b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "host mode rounding", "Map 3 0b01 0x09 256-bit" ], "ExpectedArm64ASM": [ - "frinti z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "frinti z3.d, p7/m, z2.d", + "mov z16.d, p7/m, z3.d" ] }, "vroundss xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "nearest rounding", "Map 3 0b01 0x0a 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frintn s0, s16", - "mov v16.s[0], v0.s[0]" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frintn s0, s2", + "mov v3.s[0], v0.s[0]", + "mov z16.d, p7/m, z3.d" ] }, "vroundss xmm0, xmm1, 00000001b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "-inf rounding", "Map 3 0b01 0x0a 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frintm s0, s16", - "mov v16.s[0], v0.s[0]" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frintm s0, s2", + "mov v3.s[0], v0.s[0]", + "mov z16.d, p7/m, z3.d" ] }, "vroundss xmm0, xmm1, 00000010b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "+inf rounding", "Map 3 0b01 0x0a 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frintp s0, s16", - "mov v16.s[0], v0.s[0]" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frintp s0, s2", + "mov v3.s[0], v0.s[0]", + "mov z16.d, p7/m, z3.d" ] }, "vroundss xmm0, xmm1, 00000011b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "truncate rounding", "Map 3 0b01 0x0a 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frintz s0, s16", - "mov v16.s[0], v0.s[0]" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frintz s0, s2", + "mov v3.s[0], v0.s[0]", + "mov z16.d, p7/m, z3.d" ] }, "vroundss xmm0, xmm1, 00000100b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "host mode rounding", "Map 3 0b01 0x0a 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frinti s0, s16", - "mov v16.s[0], v0.s[0]" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frinti s0, s2", + "mov v3.s[0], v0.s[0]", + "mov z16.d, p7/m, z3.d" ] }, "vroundsd xmm0, xmm1, 00000000b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "nearest rounding", "Map 3 0b01 0x0b 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frintn d0, d16", - "mov v16.d[0], v0.d[0]" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frintn d0, d2", + "mov v3.d[0], v0.d[0]", + "mov z16.d, p7/m, z3.d" ] }, "vroundsd xmm0, xmm1, 00000001b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "-inf rounding", "Map 3 0b01 0x0b 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frintm d0, d16", - "mov v16.d[0], v0.d[0]" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frintm d0, d2", + "mov v3.d[0], v0.d[0]", + "mov z16.d, p7/m, z3.d" ] }, "vroundsd xmm0, xmm1, 00000010b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "+inf rounding", "Map 3 0b01 0x0b 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frintp d0, d16", - "mov v16.d[0], v0.d[0]" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frintp d0, d2", + "mov v3.d[0], v0.d[0]", + "mov z16.d, p7/m, z3.d" ] }, "vroundsd xmm0, xmm1, 00000011b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "truncate rounding", "Map 3 0b01 0x0b 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frintz d0, d16", - "mov v16.d[0], v0.d[0]" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frintz d0, d2", + "mov v3.d[0], v0.d[0]", + "mov z16.d, p7/m, z3.d" ] }, "vroundsd xmm0, xmm1, 00000100b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 5, "Comment": [ "host mode rounding", "Map 3 0b01 0x0b 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v16.16b", - "frinti d0, d16", - "mov v16.d[0], v0.d[0]" + "mov z2.d, p7/m, z16.d", + "mov v3.16b, v2.16b", + "frinti d0, d2", + "mov v3.d[0], v0.d[0]", + "mov z16.d, p7/m, z3.d" ] }, "vblendps xmm0, xmm1, xmm2, 0000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x0c 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vblendps xmm0, xmm1, xmm2, 0001b": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 13, "Comment": [ "Map 3 0b01 0x0c 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.s[0], v18.s[0]", - "mov v2.s[1], v17.s[1]", - "mov v2.s[2], v17.s[2]", - "mov v2.s[3], v17.s[3]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v3.s[0]", + "mov v3.16b, v5.16b", + "mov v3.s[1], v2.s[1]", + "mov v4.16b, v3.16b", + "mov v4.s[2], v2.s[2]", + "mov v3.16b, v4.16b", + "mov v3.s[3], v2.s[3]", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d" ] }, "vblendps xmm0, xmm1, xmm2, 1111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x0c 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v18.16b" + "mov z2.d, p7/m, z18.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vblendps ymm0, ymm1, ymm2, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x0c 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "mov z16.d, p7/m, z2.d" ] }, "vblendps ymm0, ymm1, ymm2, 10000001b": { - "ExpectedInstructionCount": 50, + "ExpectedInstructionCount": 60, "Comment": [ "Map 3 0b01 0x0c 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.s, s18", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.s, s3", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[1]", + "mov z1.s, z2.s[1]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[2]", + "mov z1.s, z2.s[2]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[3]", + "mov z1.s, z2.s[3]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[4]", + "mov z1.s, z2.s[4]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[5]", + "mov z1.s, z2.s[5]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z17.s[6]", + "mov z1.s, z2.s[6]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", - "mov z2.s, p0/m, z1.s", + "mov z5.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, z18.s[7]", - "mov z16.d, z2.d", + "mov z1.s, z3.s[7]", + "mov z2.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", - "mov z16.s, p0/m, z1.s", - "msr nzcv, x0" + "mov z2.s, p0/m, z1.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z2.d" ] }, "vblendps ymm0, ymm1, ymm2, 11111111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x0c 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z18.d" + "mov z2.d, p7/m, z18.d", + "mov z16.d, p7/m, z2.d" ] }, "vblendpd xmm0, xmm1, xmm2, 00b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x0d 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vblendpd xmm0, xmm1, xmm2, 01b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "Map 3 0b01 0x0d 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.d[0], v18.d[0]", - "mov v16.16b, v2.16b", - "mov v16.d[1], v17.d[1]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.d[0], v3.d[0]", + "mov v3.16b, v5.16b", + "mov v3.d[1], v2.d[1]", + "mov z16.d, p7/m, z3.d" ] }, "vblendpd xmm0, xmm1, xmm2, 10b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 8, "Comment": [ "Map 3 0b01 0x0d 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.d[0], v17.d[0]", - "mov v16.16b, v2.16b", - "mov v16.d[1], v18.d[1]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.d[0], v2.d[0]", + "mov v2.16b, v5.16b", + "mov v2.d[1], v3.d[1]", + "mov z16.d, p7/m, z2.d" ] }, "vblendpd xmm0, xmm1, xmm2, 11b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x0d 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v18.16b" + "mov z2.d, p7/m, z18.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vblendpd ymm0, ymm1, ymm2, 0000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x0d 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "mov z16.d, p7/m, z2.d" ] }, "vblendpd ymm0, ymm1, ymm2, 0001b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 32, "Comment": [ "Map 3 0b01 0x0d 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d18", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.d, d3", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[1]", + "mov z1.d, z2.d[1]", + "mov z3.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", + "mov z1.d, z2.d[2]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[3]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vblendpd ymm0, ymm1, ymm2, 0010b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 32, "Comment": [ "Map 3 0b01 0x0d 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d17", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.d, d2", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[1]", + "mov z1.d, z3.d[1]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", + "mov z1.d, z2.d[2]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[3]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z4.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vblendpd ymm0, ymm1, ymm2, 0011b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 32, "Comment": [ "Map 3 0b01 0x0d 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d18", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.d, d3", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[1]", + "mov z1.d, z3.d[1]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", + "mov z1.d, z2.d[2]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z3.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[3]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z4.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vblendpd ymm0, ymm1, ymm2, 0100b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 32, "Comment": [ "Map 3 0b01 0x0d 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d17", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.d, d2", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[1]", + "mov z1.d, z2.d[1]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[2]", + "mov z1.d, z3.d[2]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[3]", + "mov z3.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vblendpd ymm0, ymm1, ymm2, 0101b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 32, "Comment": [ "Map 3 0b01 0x0d 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d18", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.d, d3", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[1]", + "mov z1.d, z2.d[1]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[2]", + "mov z1.d, z3.d[2]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[3]", + "mov z3.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vblendpd ymm0, ymm1, ymm2, 0110b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 32, "Comment": [ "Map 3 0b01 0x0d 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d17", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.d, d2", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[1]", + "mov z1.d, z3.d[1]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[2]", + "mov z1.d, z3.d[2]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[3]", + "mov z3.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vblendpd ymm0, ymm1, ymm2, 0111b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 32, "Comment": [ "Map 3 0b01 0x0d 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d18", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.d, d3", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[1]", + "mov z1.d, z3.d[1]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[2]", + "mov z1.d, z3.d[2]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z2.d[3]", + "mov z3.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z3.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vblendpd ymm0, ymm1, ymm2, 1000b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 32, "Comment": [ "Map 3 0b01 0x0d 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d17", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.d, d2", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[1]", + "mov z1.d, z2.d[1]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", + "mov z1.d, z2.d[2]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z3.d[3]", + "mov z2.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z2.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z2.d" ] }, "vblendpd ymm0, ymm1, ymm2, 1001b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 32, "Comment": [ "Map 3 0b01 0x0d 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d18", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.d, d3", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[1]", + "mov z1.d, z2.d[1]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", + "mov z1.d, z2.d[2]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z3.d[3]", + "mov z2.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z2.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z2.d" ] }, "vblendpd ymm0, ymm1, ymm2, 1010b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 32, "Comment": [ "Map 3 0b01 0x0d 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d17", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.d, d2", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[1]", + "mov z1.d, z3.d[1]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", + "mov z1.d, z2.d[2]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z3.d[3]", + "mov z2.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z2.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z2.d" ] }, "vblendpd ymm0, ymm1, ymm2, 1011b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 32, "Comment": [ "Map 3 0b01 0x0d 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d18", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.d, d3", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[1]", + "mov z1.d, z3.d[1]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[2]", + "mov z1.d, z2.d[2]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z3.d[3]", + "mov z2.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z2.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z2.d" ] }, "vblendpd ymm0, ymm1, ymm2, 1100b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 32, "Comment": [ "Map 3 0b01 0x0d 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d17", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.d, d2", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[1]", + "mov z1.d, z2.d[1]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[2]", + "mov z1.d, z3.d[2]", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", "mov z2.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z3.d[3]", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z4.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vblendpd ymm0, ymm1, ymm2, 1101b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 32, "Comment": [ "Map 3 0b01 0x0d 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d18", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.d, d3", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z17.d[1]", + "mov z1.d, z2.d[1]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[2]", + "mov z1.d, z3.d[2]", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", "mov z2.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z3.d[3]", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z4.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vblendpd ymm0, ymm1, ymm2, 1110b": { - "ExpectedInstructionCount": 26, + "ExpectedInstructionCount": 32, "Comment": [ "Map 3 0b01 0x0d 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.d, d17", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.d, d2", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-2", - "mov z2.d, p0/m, z1.d", + "mov z5.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[1]", + "mov z1.d, z3.d[1]", + "mov z2.d, z5.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #-1", "mov z2.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[2]", + "mov z1.d, z3.d[2]", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #0", - "mov z2.d, p0/m, z1.d", + "mov z4.d, p0/m, z1.d", "msr nzcv, x0", - "mov z1.d, z18.d[3]", - "mov z16.d, z2.d", + "mov z1.d, z3.d[3]", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.d, #-2, #1", "cmpeq p0.d, p7/z, z0.d, #1", - "mov z16.d, p0/m, z1.d", - "msr nzcv, x0" + "mov z2.d, p0/m, z1.d", + "msr nzcv, x0", + "mov z16.d, p7/m, z2.d" ] }, "vblendpd ymm0, ymm1, ymm2, 1111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x0d 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z18.d" + "mov z2.d, p7/m, z18.d", + "mov z16.d, p7/m, z2.d" ] }, "vpblendw xmm0, xmm1, xmm2, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x0e 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpblendw xmm0, xmm1, xmm2, 00000001b": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 21, "Comment": [ "Map 3 0b01 0x0e 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov v2.h[0], v18.h[0]", - "mov v2.h[1], v17.h[1]", - "mov v2.h[2], v17.h[2]", - "mov v2.h[3], v17.h[3]", - "mov v2.h[4], v17.h[4]", - "mov v2.h[5], v17.h[5]", - "mov v2.h[6], v17.h[6]", - "mov v2.h[7], v17.h[7]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.h[0], v3.h[0]", + "mov v3.16b, v5.16b", + "mov v3.h[1], v2.h[1]", + "mov v4.16b, v3.16b", + "mov v4.h[2], v2.h[2]", + "mov v3.16b, v4.16b", + "mov v3.h[3], v2.h[3]", + "mov v4.16b, v3.16b", + "mov v4.h[4], v2.h[4]", + "mov v3.16b, v4.16b", + "mov v3.h[5], v2.h[5]", + "mov v4.16b, v3.16b", + "mov v4.h[6], v2.h[6]", + "mov v3.16b, v4.16b", + "mov v3.h[7], v2.h[7]", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpblendw xmm0, xmm1, xmm2, 11111111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x0e 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v18.16b" + "mov z2.d, p7/m, z18.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpblendw ymm0, ymm1, ymm2, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x0e 128-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "mov z16.d, p7/m, z2.d" ] }, "vpblendw ymm0, ymm1, ymm2, 00000001b": { - "ExpectedInstructionCount": 98, + "ExpectedInstructionCount": 116, "Comment": [ "Map 3 0b01 0x0e 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.h, h18", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.h, h3", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-8", - "mov z2.h, p0/m, z1.h", + "mov z5.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[1]", + "mov z1.h, z2.h[1]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-7", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[2]", + "mov z1.h, z2.h[2]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-6", - "mov z2.h, p0/m, z1.h", + "mov z5.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[3]", + "mov z1.h, z2.h[3]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-5", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[4]", + "mov z1.h, z2.h[4]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-4", - "mov z2.h, p0/m, z1.h", + "mov z5.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[5]", + "mov z1.h, z2.h[5]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-3", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[6]", + "mov z1.h, z2.h[6]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-2", - "mov z2.h, p0/m, z1.h", + "mov z5.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[7]", + "mov z1.h, z2.h[7]", + "mov z4.d, z5.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #-1", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z18.h[8]", + "mov z1.h, z3.h[8]", + "mov z5.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #0", - "mov z2.h, p0/m, z1.h", + "mov z5.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[9]", + "mov z1.h, z2.h[9]", + "mov z3.d, z5.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #1", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[10]", + "mov z1.h, z2.h[10]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #2", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[11]", + "mov z1.h, z2.h[11]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #3", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[12]", + "mov z1.h, z2.h[12]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #4", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[13]", + "mov z1.h, z2.h[13]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #5", - "mov z2.h, p0/m, z1.h", + "mov z3.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[14]", + "mov z1.h, z2.h[14]", + "mov z4.d, z3.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #6", - "mov z2.h, p0/m, z1.h", + "mov z4.h, p0/m, z1.h", "msr nzcv, x0", - "mov z1.h, z17.h[15]", - "mov z16.d, z2.d", + "mov z1.h, z2.h[15]", + "mov z3.d, z4.d", "mrs x0, nzcv", "index z0.h, #-8, #1", "cmpeq p0.h, p7/z, z0.h, #7", - "mov z16.h, p0/m, z1.h", - "msr nzcv, x0" + "mov z3.h, p0/m, z1.h", + "msr nzcv, x0", + "mov z16.d, p7/m, z3.d" ] }, "vpblendw ymm0, ymm1, ymm2, 11111111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x0e 128-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z18.d" + "mov z2.d, p7/m, z18.d", + "mov z16.d, p7/m, z2.d" ] }, "vpalignr xmm0, xmm1, xmm2, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x0f 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v18.16b" + "mov z2.d, p7/m, z18.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpalignr xmm0, xmm1, xmm2, 1": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 3 0b01 0x0f 128-bit" ], "ExpectedArm64ASM": [ - "ext v16.16b, v18.16b, v17.16b, #1" + "mov z2.d, p7/m, z18.d", + "mov z3.d, p7/m, z17.d", + "ext v4.16b, v2.16b, v3.16b, #1", + "mov z16.d, p7/m, z4.d" ] }, "vpalignr xmm0, xmm1, xmm2, 15": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 3 0b01 0x0f 128-bit" ], "ExpectedArm64ASM": [ - "ext v16.16b, v18.16b, v17.16b, #15" + "mov z2.d, p7/m, z18.d", + "mov z3.d, p7/m, z17.d", + "ext v4.16b, v2.16b, v3.16b, #15", + "mov z16.d, p7/m, z4.d" ] }, "vpalignr xmm0, xmm1, xmm2, 16": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 3 0b01 0x0f 128-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z18.d", + "mov z3.d, p7/m, z17.d", "movi v0.2d, #0x0", - "ext v16.16b, v17.16b, v0.16b, #0" + "ext v4.16b, v3.16b, v0.16b, #0", + "mov z16.d, p7/m, z4.d" ] }, "vpalignr ymm0, ymm1, ymm2, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x0f 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z18.d" + "mov z2.d, p7/m, z18.d", + "mov z16.d, p7/m, z2.d" ] }, "vpalignr ymm0, ymm1, ymm2, 1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 15, "Comment": [ "Map 3 0b01 0x0f 256-bit" ], "ExpectedArm64ASM": [ - "ext v2.16b, v18.16b, v17.16b, #1", - "mov z1.q, z17.q[1]", - "mov z3.d, z17.d", + "mov z2.d, p7/m, z18.d", + "mov z3.d, p7/m, z17.d", + "ext v4.16b, v2.16b, v3.16b, #1", + "mov z1.q, z3.q[1]", + "mov z5.d, z3.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, z2.q[1]", + "mov z3.d, z2.d", "mov z3.b, p6/m, z1.b", - "mov z1.q, z18.q[1]", - "mov z4.d, z18.d", - "mov z4.b, p6/m, z1.b", - "ext v3.16b, v4.16b, v3.16b, #1", - "mov z1.q, q3", - "mov z16.d, z2.d", + "ext v2.16b, v3.16b, v5.16b, #1", + "mov z1.q, q2", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vpalignr ymm0, ymm1, ymm2, 15": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 15, "Comment": [ "Map 3 0b01 0x0f 256-bit" ], "ExpectedArm64ASM": [ - "ext v2.16b, v18.16b, v17.16b, #15", - "mov z1.q, z17.q[1]", - "mov z3.d, z17.d", + "mov z2.d, p7/m, z18.d", + "mov z3.d, p7/m, z17.d", + "ext v4.16b, v2.16b, v3.16b, #15", + "mov z1.q, z3.q[1]", + "mov z5.d, z3.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, z2.q[1]", + "mov z3.d, z2.d", "mov z3.b, p6/m, z1.b", - "mov z1.q, z18.q[1]", - "mov z4.d, z18.d", - "mov z4.b, p6/m, z1.b", - "ext v3.16b, v4.16b, v3.16b, #15", - "mov z1.q, q3", - "mov z16.d, z2.d", + "ext v2.16b, v3.16b, v5.16b, #15", + "mov z1.q, q2", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vpalignr ymm0, ymm1, ymm2, 16": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "Map 3 0b01 0x0f 256-bit" ], "ExpectedArm64ASM": [ + "mov z2.d, p7/m, z18.d", + "mov z3.d, p7/m, z17.d", "movi v0.2d, #0x0", - "ext v2.16b, v17.16b, v0.16b, #0", - "mov z1.q, z17.q[1]", - "mov z3.d, z17.d", + "ext v4.16b, v3.16b, v0.16b, #0", + "mov z1.q, z3.q[1]", + "mov z5.d, z3.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, z2.q[1]", + "mov z3.d, z2.d", "mov z3.b, p6/m, z1.b", - "mov z1.q, z18.q[1]", - "mov z4.d, z18.d", - "mov z4.b, p6/m, z1.b", "movi v0.2d, #0x0", - "ext v3.16b, v3.16b, v0.16b, #0", - "mov z1.q, q3", - "mov z16.d, z2.d", + "ext v2.16b, v5.16b, v0.16b, #0", + "mov z1.q, q2", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vpextrb rax, xmm0, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x14 128-bit" ], "ExpectedArm64ASM": [ - "umov w4, v16.b[0]" + "mov z2.d, p7/m, z16.d", + "umov w20, v2.b[0]", + "mov x4, x20" ] }, "vpextrb rax, xmm0, 15": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x14 128-bit" ], "ExpectedArm64ASM": [ - "umov w4, v16.b[15]" + "mov z2.d, p7/m, z16.d", + "umov w20, v2.b[15]", + "mov x4, x20" ] }, "vpextrw rax, xmm0, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x15 128-bit" ], "ExpectedArm64ASM": [ - "umov w4, v16.h[0]" + "mov z2.d, p7/m, z16.d", + "umov w20, v2.h[0]", + "mov x4, x20" ] }, "vpextrw rax, xmm0, 7": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x15 128-bit" ], "ExpectedArm64ASM": [ - "umov w4, v16.h[7]" + "mov z2.d, p7/m, z16.d", + "umov w20, v2.h[7]", + "mov x4, x20" ] }, "vpextrd rax, xmm0, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x16 128-bit" ], "ExpectedArm64ASM": [ - "mov w4, v16.s[0]" + "mov z2.d, p7/m, z16.d", + "mov w20, v2.s[0]", + "mov x4, x20" ] }, "vpextrd rax, xmm0, 3": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x16 128-bit" ], "ExpectedArm64ASM": [ - "mov w4, v16.s[3]" + "mov z2.d, p7/m, z16.d", + "mov w20, v2.s[3]", + "mov x4, x20" ] }, "vpextrb [rax], xmm0, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x14 128-bit" ], "ExpectedArm64ASM": [ - "st1 {v16.b}[0], [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1 {v2.b}[0], [x20]" ] }, "vpextrb [rax], xmm0, 15": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x14 128-bit" ], "ExpectedArm64ASM": [ - "st1 {v16.b}[15], [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1 {v2.b}[15], [x20]" ] }, "vpextrw [rax], xmm0, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x15 128-bit" ], "ExpectedArm64ASM": [ - "st1 {v16.h}[0], [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1 {v2.h}[0], [x20]" ] }, "vpextrw [rax], xmm0, 7": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x15 128-bit" ], "ExpectedArm64ASM": [ - "st1 {v16.h}[7], [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1 {v2.h}[7], [x20]" ] }, "vpextrd [rax], xmm0, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x16 128-bit" ], "ExpectedArm64ASM": [ - "st1 {v16.s}[0], [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1 {v2.s}[0], [x20]" ] }, "vpextrd [rax], xmm0, 3": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x16 128-bit" ], "ExpectedArm64ASM": [ - "st1 {v16.s}[3], [x4]" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "st1 {v2.s}[3], [x20]" ] }, "vextractps eax, xmm0, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x17 128-bit" ], "ExpectedArm64ASM": [ - "mov w4, v16.s[0]" + "mov z2.d, p7/m, z16.d", + "mov w20, v2.s[0]", + "mov x4, x20" ] }, "vextractps eax, xmm0, 3": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x17 128-bit" ], "ExpectedArm64ASM": [ - "mov w4, v16.s[3]" + "mov z2.d, p7/m, z16.d", + "mov w20, v2.s[3]", + "mov x4, x20" ] }, "vinsertf128 ymm0, ymm1, xmm2, 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 3 0b01 0x18 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.q, q18", - "mov z16.d, z17.d", - "mov z16.b, p6/m, z1.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z1.q, q3", + "mov z4.d, z2.d", + "mov z4.b, p6/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vinsertf128 ymm0, ymm1, xmm2, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0x18 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.q, q18", - "mov z16.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z1.q, q3", + "mov z4.d, z2.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z4.b, p0/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vextractf128 xmm0, ymm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x19 256-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vextractf128 xmm0, ymm1, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 3 0b01 0x19 256-bit" ], "ExpectedArm64ASM": [ - "mov z2.q, z17.q[1]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.q, z2.q[1]", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d" ] }, "vcvtps2ph xmm0, xmm1, 00000000b": { @@ -3141,997 +3776,1143 @@ ] }, "vpinsrb xmm0, xmm0, eax, 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 3 0b01 0x20 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v16.16b", - "mov v2.b[0], w4", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.b[0], w20", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpinsrb xmm0, xmm1, eax, 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 3 0b01 0x20 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "mov v16.b[0], w4" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.b[0], w20", + "mov z16.d, p7/m, z3.d" ] }, "vpinsrb xmm0, xmm1, eax, 15": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 3 0b01 0x20 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "mov v16.b[15], w4" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.b[15], w20", + "mov z16.d, p7/m, z3.d" ] }, "vinsertps xmm0, xmm1, xmm2, ((0b00 << 6) | (0b00 << 4) | (0b0000))": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 3 0b01 0x21 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "mov v16.s[0], v18.s[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "mov v4.s[0], v3.s[0]", + "mov z16.d, p7/m, z4.d" ] }, "vinsertps xmm0, xmm1, xmm2, ((0b00 << 6) | (0b00 << 4) | (0b1111))": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x21 128-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov z16.d, p7/m, z2.d" ] }, "vinsertps xmm0, xmm1, xmm2, ((0b11 << 6) | (0b11 << 4) | (0b0000))": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 3 0b01 0x21 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "mov v16.s[3], v18.s[3]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov v4.16b, v2.16b", + "mov v4.s[3], v3.s[3]", + "mov z16.d, p7/m, z4.d" ] }, "vpinsrd xmm0, xmm0, eax, 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 3 0b01 0x22 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v16.16b", - "mov v2.s[0], w4", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.s[0], w20", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpinsrd xmm0, xmm1, eax, 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 3 0b01 0x22 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "mov v16.s[0], w4" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.s[0], w20", + "mov z16.d, p7/m, z3.d" ] }, "vpinsrd xmm0, xmm1, eax, 3": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 3 0b01 0x22 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "mov v16.s[3], w4" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.s[3], w20", + "mov z16.d, p7/m, z3.d" ] }, "vpinsrq xmm0, xmm0, rax, 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 3 0b01 0x22 128-bit" ], "ExpectedArm64ASM": [ - "mov v2.16b, v16.16b", - "mov v2.d[0], x4", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z16.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.d[0], x20", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d" ] }, "vpinsrq xmm0, xmm1, rax, 0": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 3 0b01 0x22 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "mov v16.d[0], x4" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.d[0], x20", + "mov z16.d, p7/m, z3.d" ] }, "vpinsrq xmm0, xmm1, rax, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 3 0b01 0x22 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b", - "mov v16.d[1], x4" + "mov z2.d, p7/m, z17.d", + "mov x20, x4", + "mov v3.16b, v2.16b", + "mov v3.d[1], x20", + "mov z16.d, p7/m, z3.d" ] }, "vinserti128 ymm0, ymm1, xmm2, 0": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 3 0b01 0x38 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.q, q18", - "mov z16.d, z17.d", - "mov z16.b, p6/m, z1.b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z1.q, q3", + "mov z4.d, z2.d", + "mov z4.b, p6/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vinserti128 ymm0, ymm1, xmm2, 1": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0x38 256-bit" ], "ExpectedArm64ASM": [ - "mov z1.q, q18", - "mov z16.d, z17.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z1.q, q3", + "mov z4.d, z2.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z4.b, p0/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vextracti128 xmm0, ymm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b01 0x39 256-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vextracti128 xmm0, ymm1, 1": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map 3 0b01 0x39 256-bit" ], "ExpectedArm64ASM": [ - "mov z2.q, z17.q[1]", - "mov v16.16b, v2.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.q, z2.q[1]", + "mov v2.16b, v3.16b", + "mov z16.d, p7/m, z2.d" ] }, "vdpps xmm0, xmm1, xmm2, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x40 128-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov z16.d, p7/m, z2.d" ] }, "vdpps xmm0, xmm1, xmm2, 00001111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x40 128-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov z16.d, p7/m, z2.d" ] }, "vdpps xmm0, xmm1, xmm2, 11110000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x40 128-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov z16.d, p7/m, z2.d" ] }, "vdpps xmm0, xmm1, xmm2, 11111111b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 3 0b01 0x40 128-bit" ], "ExpectedArm64ASM": [ - "fmul v2.4s, v17.4s, v18.4s", - "faddv s2, p6, z2.s", - "dup v16.4s, v2.s[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fmul v4.4s, v2.4s, v3.4s", + "faddv s2, p6, z4.s", + "dup v3.4s, v2.s[0]", + "mov z16.d, p7/m, z3.d" ] }, "vdpps ymm0, ymm1, ymm2, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x40 128-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov z16.d, p7/m, z2.d" ] }, "vdpps ymm0, ymm1, ymm2, 00001111b": { - "ExpectedInstructionCount": 109, + "ExpectedInstructionCount": 127, "Comment": [ "Map 3 0b01 0x40 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "fmul z3.s, z17.s, z18.s", - "mov z1.s, s2", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "fmul z5.s, z2.s, z3.s", + "mov z1.s, s4", + "mov z2.d, z5.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", - "mov z3.s, p0/m, z1.s", + "mov z2.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s2", + "mov z1.s, s4", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s2", + "mov z1.s, s4", + "mov z2.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", - "mov z3.s, p0/m, z1.s", + "mov z2.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s2", + "mov z1.s, s4", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s2", + "mov z1.s, s4", + "mov z2.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", - "mov z3.s, p0/m, z1.s", + "mov z2.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s2", + "mov z1.s, s4", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", "mov z3.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s2", + "mov z1.s, s4", + "mov z2.d, z3.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", - "mov z3.s, p0/m, z1.s", + "mov z2.s, p0/m, z1.s", "msr nzcv, x0", - "mov z1.s, s2", + "mov z1.s, s4", + "mov z3.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", "mov z3.s, p0/m, z1.s", "msr nzcv, x0", "movprfx z0, z3", - "faddp z0.s, p7/m, z0.s, z2.s", - "uzp1 z3.s, z0.s, z0.s", + "faddp z0.s, p7/m, z0.s, z4.s", + "uzp1 z2.s, z0.s, z0.s", "uzp2 z1.s, z0.s, z0.s", - "splice z3.d, p6, z3.d, z1.d", - "movprfx z0, z3", - "faddp z0.s, p7/m, z0.s, z2.s", + "splice z2.d, p6, z2.d, z1.d", + "movprfx z0, z2", + "faddp z0.s, p7/m, z0.s, z4.s", "uzp1 z3.s, z0.s, z0.s", "uzp2 z1.s, z0.s, z0.s", "splice z3.d, p6, z3.d, z1.d", "mov z1.s, s3", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", "mov z2.s, p0/m, z1.s", "msr nzcv, x0", "mov z1.s, s3", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", "mov z1.s, s3", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", "mov z2.s, p0/m, z1.s", "msr nzcv, x0", "mov z1.s, s3", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", "mov z1.s, s3", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", "mov z2.s, p0/m, z1.s", "msr nzcv, x0", "mov z1.s, s3", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", "mov z1.s, s3", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", "mov z2.s, p0/m, z1.s", "msr nzcv, x0", "mov z1.s, s3", - "mov z16.d, z2.d", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", - "mov z16.s, p0/m, z1.s", - "msr nzcv, x0" + "mov z4.s, p0/m, z1.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vdpps ymm0, ymm1, ymm2, 11110000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x40 128-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov z16.d, p7/m, z2.d" ] }, "vdpps ymm0, ymm1, ymm2, 11111111b": { - "ExpectedInstructionCount": 61, + "ExpectedInstructionCount": 71, "Comment": [ "Map 3 0b01 0x40 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "fmul z3.s, z17.s, z18.s", - "movprfx z0, z3", - "faddp z0.s, p7/m, z0.s, z2.s", - "uzp1 z3.s, z0.s, z0.s", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "fmul z5.s, z2.s, z3.s", + "movprfx z0, z5", + "faddp z0.s, p7/m, z0.s, z4.s", + "uzp1 z2.s, z0.s, z0.s", "uzp2 z1.s, z0.s, z0.s", - "splice z3.d, p6, z3.d, z1.d", - "movprfx z0, z3", - "faddp z0.s, p7/m, z0.s, z2.s", + "splice z2.d, p6, z2.d, z1.d", + "movprfx z0, z2", + "faddp z0.s, p7/m, z0.s, z4.s", "uzp1 z3.s, z0.s, z0.s", "uzp2 z1.s, z0.s, z0.s", "splice z3.d, p6, z3.d, z1.d", "mov z1.s, s3", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-4", "mov z2.s, p0/m, z1.s", "msr nzcv, x0", "mov z1.s, s3", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-3", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", "mov z1.s, s3", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-2", "mov z2.s, p0/m, z1.s", "msr nzcv, x0", "mov z1.s, s3", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #-1", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", "mov z1.s, s3", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #0", "mov z2.s, p0/m, z1.s", "msr nzcv, x0", "mov z1.s, s3", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #1", - "mov z2.s, p0/m, z1.s", + "mov z4.s, p0/m, z1.s", "msr nzcv, x0", "mov z1.s, s3", + "mov z2.d, z4.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #2", "mov z2.s, p0/m, z1.s", "msr nzcv, x0", "mov z1.s, s3", - "mov z16.d, z2.d", + "mov z4.d, z2.d", "mrs x0, nzcv", "index z0.s, #-4, #1", "cmpeq p0.s, p7/z, z0.s, #3", - "mov z16.s, p0/m, z1.s", - "msr nzcv, x0" + "mov z4.s, p0/m, z1.s", + "msr nzcv, x0", + "mov z16.d, p7/m, z4.d" ] }, "vdppd xmm0, xmm1, xmm2, 00000000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x41 128-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov z16.d, p7/m, z2.d" ] }, "vdppd xmm0, xmm1, xmm2, 00001111b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x41 128-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov z16.d, p7/m, z2.d" ] }, "vdppd xmm0, xmm1, xmm2, 11110000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x41 128-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov z16.d, p7/m, z2.d" ] }, "vdppd xmm0, xmm1, xmm2, 11111111b": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 6, "Comment": [ "Map 3 0b01 0x41 128-bit" ], "ExpectedArm64ASM": [ - "fmul v2.2d, v17.2d, v18.2d", - "faddv d2, p6, z2.d", - "dup v16.2d, v2.d[0]" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "fmul v4.2d, v2.2d, v3.2d", + "faddv d2, p6, z4.d", + "dup v3.2d, v2.d[0]", + "mov z16.d, p7/m, z3.d" ] }, "vmpsadbw xmm0, xmm1, xmm2, 000b": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "Map 3 0b01 0x42 128-bit" ], "ExpectedArm64ASM": [ - "dup v2.4s, v18.s[0]", - "ext v3.16b, v17.16b, v17.16b, #0", - "ext v4.16b, v17.16b, v17.16b, #1", - "ext v5.16b, v17.16b, v17.16b, #2", - "ext v6.16b, v17.16b, v17.16b, #3", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v16.8h, v4.8h, v2.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v4.4s, v3.s[0]", + "ext v3.16b, v2.16b, v2.16b, #0", + "ext v5.16b, v2.16b, v2.16b, #1", + "ext v6.16b, v2.16b, v2.16b, #2", + "ext v7.16b, v2.16b, v2.16b, #3", + "uabdl v2.8h, v3.8b, v4.8b", + "uabdl v3.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "addp v4.8h, v2.8h, v5.8h", + "addp v2.8h, v3.8h, v6.8h", + "trn1 v3.4s, v4.4s, v2.4s", + "trn2 v5.4s, v4.4s, v2.4s", + "addp v2.8h, v3.8h, v5.8h", + "mov z16.d, p7/m, z2.d" ] }, "vmpsadbw xmm0, xmm1, xmm2, 001b": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "Map 3 0b01 0x42 128-bit" ], "ExpectedArm64ASM": [ - "dup v2.4s, v18.s[1]", - "ext v3.16b, v17.16b, v17.16b, #0", - "ext v4.16b, v17.16b, v17.16b, #1", - "ext v5.16b, v17.16b, v17.16b, #2", - "ext v6.16b, v17.16b, v17.16b, #3", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v16.8h, v4.8h, v2.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v4.4s, v3.s[1]", + "ext v3.16b, v2.16b, v2.16b, #0", + "ext v5.16b, v2.16b, v2.16b, #1", + "ext v6.16b, v2.16b, v2.16b, #2", + "ext v7.16b, v2.16b, v2.16b, #3", + "uabdl v2.8h, v3.8b, v4.8b", + "uabdl v3.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "addp v4.8h, v2.8h, v5.8h", + "addp v2.8h, v3.8h, v6.8h", + "trn1 v3.4s, v4.4s, v2.4s", + "trn2 v5.4s, v4.4s, v2.4s", + "addp v2.8h, v3.8h, v5.8h", + "mov z16.d, p7/m, z2.d" ] }, "vmpsadbw xmm0, xmm1, xmm2, 010b": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "Map 3 0b01 0x42 128-bit" ], "ExpectedArm64ASM": [ - "dup v2.4s, v18.s[2]", - "ext v3.16b, v17.16b, v17.16b, #0", - "ext v4.16b, v17.16b, v17.16b, #1", - "ext v5.16b, v17.16b, v17.16b, #2", - "ext v6.16b, v17.16b, v17.16b, #3", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v16.8h, v4.8h, v2.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v4.4s, v3.s[2]", + "ext v3.16b, v2.16b, v2.16b, #0", + "ext v5.16b, v2.16b, v2.16b, #1", + "ext v6.16b, v2.16b, v2.16b, #2", + "ext v7.16b, v2.16b, v2.16b, #3", + "uabdl v2.8h, v3.8b, v4.8b", + "uabdl v3.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "addp v4.8h, v2.8h, v5.8h", + "addp v2.8h, v3.8h, v6.8h", + "trn1 v3.4s, v4.4s, v2.4s", + "trn2 v5.4s, v4.4s, v2.4s", + "addp v2.8h, v3.8h, v5.8h", + "mov z16.d, p7/m, z2.d" ] }, "vmpsadbw xmm0, xmm1, xmm2, 011b": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "Map 3 0b01 0x42 128-bit" ], "ExpectedArm64ASM": [ - "dup v2.4s, v18.s[3]", - "ext v3.16b, v17.16b, v17.16b, #0", - "ext v4.16b, v17.16b, v17.16b, #1", - "ext v5.16b, v17.16b, v17.16b, #2", - "ext v6.16b, v17.16b, v17.16b, #3", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v16.8h, v4.8h, v2.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v4.4s, v3.s[3]", + "ext v3.16b, v2.16b, v2.16b, #0", + "ext v5.16b, v2.16b, v2.16b, #1", + "ext v6.16b, v2.16b, v2.16b, #2", + "ext v7.16b, v2.16b, v2.16b, #3", + "uabdl v2.8h, v3.8b, v4.8b", + "uabdl v3.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "addp v4.8h, v2.8h, v5.8h", + "addp v2.8h, v3.8h, v6.8h", + "trn1 v3.4s, v4.4s, v2.4s", + "trn2 v5.4s, v4.4s, v2.4s", + "addp v2.8h, v3.8h, v5.8h", + "mov z16.d, p7/m, z2.d" ] }, "vmpsadbw xmm0, xmm1, xmm2, 100b": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "Map 3 0b01 0x42 128-bit" ], "ExpectedArm64ASM": [ - "dup v2.4s, v18.s[0]", - "ext v3.16b, v17.16b, v17.16b, #4", - "ext v4.16b, v17.16b, v17.16b, #5", - "ext v5.16b, v17.16b, v17.16b, #6", - "ext v6.16b, v17.16b, v17.16b, #7", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v16.8h, v4.8h, v2.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v4.4s, v3.s[0]", + "ext v3.16b, v2.16b, v2.16b, #4", + "ext v5.16b, v2.16b, v2.16b, #5", + "ext v6.16b, v2.16b, v2.16b, #6", + "ext v7.16b, v2.16b, v2.16b, #7", + "uabdl v2.8h, v3.8b, v4.8b", + "uabdl v3.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "addp v4.8h, v2.8h, v5.8h", + "addp v2.8h, v3.8h, v6.8h", + "trn1 v3.4s, v4.4s, v2.4s", + "trn2 v5.4s, v4.4s, v2.4s", + "addp v2.8h, v3.8h, v5.8h", + "mov z16.d, p7/m, z2.d" ] }, "vmpsadbw xmm0, xmm1, xmm2, 101b": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "Map 3 0b01 0x42 128-bit" ], "ExpectedArm64ASM": [ - "dup v2.4s, v18.s[1]", - "ext v3.16b, v17.16b, v17.16b, #4", - "ext v4.16b, v17.16b, v17.16b, #5", - "ext v5.16b, v17.16b, v17.16b, #6", - "ext v6.16b, v17.16b, v17.16b, #7", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v16.8h, v4.8h, v2.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v4.4s, v3.s[1]", + "ext v3.16b, v2.16b, v2.16b, #4", + "ext v5.16b, v2.16b, v2.16b, #5", + "ext v6.16b, v2.16b, v2.16b, #6", + "ext v7.16b, v2.16b, v2.16b, #7", + "uabdl v2.8h, v3.8b, v4.8b", + "uabdl v3.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "addp v4.8h, v2.8h, v5.8h", + "addp v2.8h, v3.8h, v6.8h", + "trn1 v3.4s, v4.4s, v2.4s", + "trn2 v5.4s, v4.4s, v2.4s", + "addp v2.8h, v3.8h, v5.8h", + "mov z16.d, p7/m, z2.d" ] }, "vmpsadbw xmm0, xmm1, xmm2, 110b": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "Map 3 0b01 0x42 128-bit" ], "ExpectedArm64ASM": [ - "dup v2.4s, v18.s[2]", - "ext v3.16b, v17.16b, v17.16b, #4", - "ext v4.16b, v17.16b, v17.16b, #5", - "ext v5.16b, v17.16b, v17.16b, #6", - "ext v6.16b, v17.16b, v17.16b, #7", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v16.8h, v4.8h, v2.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v4.4s, v3.s[2]", + "ext v3.16b, v2.16b, v2.16b, #4", + "ext v5.16b, v2.16b, v2.16b, #5", + "ext v6.16b, v2.16b, v2.16b, #6", + "ext v7.16b, v2.16b, v2.16b, #7", + "uabdl v2.8h, v3.8b, v4.8b", + "uabdl v3.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "addp v4.8h, v2.8h, v5.8h", + "addp v2.8h, v3.8h, v6.8h", + "trn1 v3.4s, v4.4s, v2.4s", + "trn2 v5.4s, v4.4s, v2.4s", + "addp v2.8h, v3.8h, v5.8h", + "mov z16.d, p7/m, z2.d" ] }, "vmpsadbw xmm0, xmm1, xmm2, 111b": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 17, "Comment": [ "Map 3 0b01 0x42 128-bit" ], "ExpectedArm64ASM": [ - "dup v2.4s, v18.s[3]", - "ext v3.16b, v17.16b, v17.16b, #4", - "ext v4.16b, v17.16b, v17.16b, #5", - "ext v5.16b, v17.16b, v17.16b, #6", - "ext v6.16b, v17.16b, v17.16b, #7", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v16.8h, v4.8h, v2.8h" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v4.4s, v3.s[3]", + "ext v3.16b, v2.16b, v2.16b, #4", + "ext v5.16b, v2.16b, v2.16b, #5", + "ext v6.16b, v2.16b, v2.16b, #6", + "ext v7.16b, v2.16b, v2.16b, #7", + "uabdl v2.8h, v3.8b, v4.8b", + "uabdl v3.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "addp v4.8h, v2.8h, v5.8h", + "addp v2.8h, v3.8h, v6.8h", + "trn1 v3.4s, v4.4s, v2.4s", + "trn2 v5.4s, v4.4s, v2.4s", + "addp v2.8h, v3.8h, v5.8h", + "mov z16.d, p7/m, z2.d" ] }, "vmpsadbw ymm0, ymm1, ymm2, 000b": { - "ExpectedInstructionCount": 34, + "ExpectedInstructionCount": 37, "Comment": [ "Map 3 0b01 0x42 256-bit" ], "ExpectedArm64ASM": [ - "dup v2.4s, v18.s[0]", - "ext v3.16b, v17.16b, v17.16b, #0", - "ext v4.16b, v17.16b, v17.16b, #1", - "ext v5.16b, v17.16b, v17.16b, #2", - "ext v6.16b, v17.16b, v17.16b, #3", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v2.8h, v4.8h, v2.8h", - "mov z3.q, z17.q[1]", - "mov z4.q, z18.q[1]", - "dup v4.4s, v4.s[0]", - "ext v5.16b, v3.16b, v3.16b, #0", - "ext v6.16b, v3.16b, v3.16b, #1", - "ext v7.16b, v3.16b, v3.16b, #2", - "ext v3.16b, v3.16b, v3.16b, #3", - "uabdl v5.8h, v5.8b, v4.8b", - "uabdl v6.8h, v6.8b, v4.8b", - "uabdl v7.8h, v7.8b, v4.8b", - "uabdl v3.8h, v3.8b, v4.8b", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v4.4s, v3.s[0]", + "ext v5.16b, v2.16b, v2.16b, #0", + "ext v6.16b, v2.16b, v2.16b, #1", + "ext v7.16b, v2.16b, v2.16b, #2", + "ext v8.16b, v2.16b, v2.16b, #3", + "uabdl v9.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "uabdl v7.8h, v8.8b, v4.8b", + "addp v4.8h, v9.8h, v6.8h", + "addp v6.8h, v5.8h, v7.8h", + "trn1 v5.4s, v4.4s, v6.4s", + "trn2 v7.4s, v4.4s, v6.4s", "addp v4.8h, v5.8h, v7.8h", - "addp v3.8h, v6.8h, v3.8h", - "trn1 v5.4s, v4.4s, v3.4s", - "trn2 v3.4s, v4.4s, v3.4s", - "addp v3.8h, v5.8h, v3.8h", + "mov z5.q, z2.q[1]", + "mov z2.q, z3.q[1]", + "dup v3.4s, v2.s[0]", + "ext v2.16b, v5.16b, v5.16b, #0", + "ext v6.16b, v5.16b, v5.16b, #1", + "ext v7.16b, v5.16b, v5.16b, #2", + "ext v8.16b, v5.16b, v5.16b, #3", + "uabdl v5.8h, v2.8b, v3.8b", + "uabdl v2.8h, v6.8b, v3.8b", + "uabdl v6.8h, v7.8b, v3.8b", + "uabdl v7.8h, v8.8b, v3.8b", + "addp v3.8h, v5.8h, v6.8h", + "addp v5.8h, v2.8h, v7.8h", + "trn1 v2.4s, v3.4s, v5.4s", + "trn2 v6.4s, v3.4s, v5.4s", + "addp v3.8h, v2.8h, v6.8h", "mov z1.q, q3", - "mov z16.d, z2.d", + "mov z2.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vmpsadbw ymm0, ymm1, ymm2, 001b": { - "ExpectedInstructionCount": 34, + "ExpectedInstructionCount": 37, "Comment": [ "Map 3 0b01 0x42 256-bit" ], "ExpectedArm64ASM": [ - "dup v2.4s, v18.s[1]", - "ext v3.16b, v17.16b, v17.16b, #0", - "ext v4.16b, v17.16b, v17.16b, #1", - "ext v5.16b, v17.16b, v17.16b, #2", - "ext v6.16b, v17.16b, v17.16b, #3", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v2.8h, v4.8h, v2.8h", - "mov z3.q, z17.q[1]", - "mov z4.q, z18.q[1]", - "dup v4.4s, v4.s[0]", - "ext v5.16b, v3.16b, v3.16b, #0", - "ext v6.16b, v3.16b, v3.16b, #1", - "ext v7.16b, v3.16b, v3.16b, #2", - "ext v3.16b, v3.16b, v3.16b, #3", - "uabdl v5.8h, v5.8b, v4.8b", - "uabdl v6.8h, v6.8b, v4.8b", - "uabdl v7.8h, v7.8b, v4.8b", - "uabdl v3.8h, v3.8b, v4.8b", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v4.4s, v3.s[1]", + "ext v5.16b, v2.16b, v2.16b, #0", + "ext v6.16b, v2.16b, v2.16b, #1", + "ext v7.16b, v2.16b, v2.16b, #2", + "ext v8.16b, v2.16b, v2.16b, #3", + "uabdl v9.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "uabdl v7.8h, v8.8b, v4.8b", + "addp v4.8h, v9.8h, v6.8h", + "addp v6.8h, v5.8h, v7.8h", + "trn1 v5.4s, v4.4s, v6.4s", + "trn2 v7.4s, v4.4s, v6.4s", "addp v4.8h, v5.8h, v7.8h", - "addp v3.8h, v6.8h, v3.8h", - "trn1 v5.4s, v4.4s, v3.4s", - "trn2 v3.4s, v4.4s, v3.4s", - "addp v3.8h, v5.8h, v3.8h", + "mov z5.q, z2.q[1]", + "mov z2.q, z3.q[1]", + "dup v3.4s, v2.s[0]", + "ext v2.16b, v5.16b, v5.16b, #0", + "ext v6.16b, v5.16b, v5.16b, #1", + "ext v7.16b, v5.16b, v5.16b, #2", + "ext v8.16b, v5.16b, v5.16b, #3", + "uabdl v5.8h, v2.8b, v3.8b", + "uabdl v2.8h, v6.8b, v3.8b", + "uabdl v6.8h, v7.8b, v3.8b", + "uabdl v7.8h, v8.8b, v3.8b", + "addp v3.8h, v5.8h, v6.8h", + "addp v5.8h, v2.8h, v7.8h", + "trn1 v2.4s, v3.4s, v5.4s", + "trn2 v6.4s, v3.4s, v5.4s", + "addp v3.8h, v2.8h, v6.8h", "mov z1.q, q3", - "mov z16.d, z2.d", + "mov z2.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vmpsadbw ymm0, ymm1, ymm2, 010b": { - "ExpectedInstructionCount": 34, + "ExpectedInstructionCount": 37, "Comment": [ "Map 3 0b01 0x42 256-bit" ], "ExpectedArm64ASM": [ - "dup v2.4s, v18.s[2]", - "ext v3.16b, v17.16b, v17.16b, #0", - "ext v4.16b, v17.16b, v17.16b, #1", - "ext v5.16b, v17.16b, v17.16b, #2", - "ext v6.16b, v17.16b, v17.16b, #3", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v2.8h, v4.8h, v2.8h", - "mov z3.q, z17.q[1]", - "mov z4.q, z18.q[1]", - "dup v4.4s, v4.s[0]", - "ext v5.16b, v3.16b, v3.16b, #0", - "ext v6.16b, v3.16b, v3.16b, #1", - "ext v7.16b, v3.16b, v3.16b, #2", - "ext v3.16b, v3.16b, v3.16b, #3", - "uabdl v5.8h, v5.8b, v4.8b", - "uabdl v6.8h, v6.8b, v4.8b", - "uabdl v7.8h, v7.8b, v4.8b", - "uabdl v3.8h, v3.8b, v4.8b", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v4.4s, v3.s[2]", + "ext v5.16b, v2.16b, v2.16b, #0", + "ext v6.16b, v2.16b, v2.16b, #1", + "ext v7.16b, v2.16b, v2.16b, #2", + "ext v8.16b, v2.16b, v2.16b, #3", + "uabdl v9.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "uabdl v7.8h, v8.8b, v4.8b", + "addp v4.8h, v9.8h, v6.8h", + "addp v6.8h, v5.8h, v7.8h", + "trn1 v5.4s, v4.4s, v6.4s", + "trn2 v7.4s, v4.4s, v6.4s", "addp v4.8h, v5.8h, v7.8h", - "addp v3.8h, v6.8h, v3.8h", - "trn1 v5.4s, v4.4s, v3.4s", - "trn2 v3.4s, v4.4s, v3.4s", - "addp v3.8h, v5.8h, v3.8h", + "mov z5.q, z2.q[1]", + "mov z2.q, z3.q[1]", + "dup v3.4s, v2.s[0]", + "ext v2.16b, v5.16b, v5.16b, #0", + "ext v6.16b, v5.16b, v5.16b, #1", + "ext v7.16b, v5.16b, v5.16b, #2", + "ext v8.16b, v5.16b, v5.16b, #3", + "uabdl v5.8h, v2.8b, v3.8b", + "uabdl v2.8h, v6.8b, v3.8b", + "uabdl v6.8h, v7.8b, v3.8b", + "uabdl v7.8h, v8.8b, v3.8b", + "addp v3.8h, v5.8h, v6.8h", + "addp v5.8h, v2.8h, v7.8h", + "trn1 v2.4s, v3.4s, v5.4s", + "trn2 v6.4s, v3.4s, v5.4s", + "addp v3.8h, v2.8h, v6.8h", "mov z1.q, q3", - "mov z16.d, z2.d", + "mov z2.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vmpsadbw ymm0, ymm1, ymm2, 011b": { - "ExpectedInstructionCount": 34, + "ExpectedInstructionCount": 37, "Comment": [ "Map 3 0b01 0x42 256-bit" ], "ExpectedArm64ASM": [ - "dup v2.4s, v18.s[3]", - "ext v3.16b, v17.16b, v17.16b, #0", - "ext v4.16b, v17.16b, v17.16b, #1", - "ext v5.16b, v17.16b, v17.16b, #2", - "ext v6.16b, v17.16b, v17.16b, #3", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v2.8h, v4.8h, v2.8h", - "mov z3.q, z17.q[1]", - "mov z4.q, z18.q[1]", - "dup v4.4s, v4.s[0]", - "ext v5.16b, v3.16b, v3.16b, #0", - "ext v6.16b, v3.16b, v3.16b, #1", - "ext v7.16b, v3.16b, v3.16b, #2", - "ext v3.16b, v3.16b, v3.16b, #3", - "uabdl v5.8h, v5.8b, v4.8b", - "uabdl v6.8h, v6.8b, v4.8b", - "uabdl v7.8h, v7.8b, v4.8b", - "uabdl v3.8h, v3.8b, v4.8b", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v4.4s, v3.s[3]", + "ext v5.16b, v2.16b, v2.16b, #0", + "ext v6.16b, v2.16b, v2.16b, #1", + "ext v7.16b, v2.16b, v2.16b, #2", + "ext v8.16b, v2.16b, v2.16b, #3", + "uabdl v9.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "uabdl v7.8h, v8.8b, v4.8b", + "addp v4.8h, v9.8h, v6.8h", + "addp v6.8h, v5.8h, v7.8h", + "trn1 v5.4s, v4.4s, v6.4s", + "trn2 v7.4s, v4.4s, v6.4s", "addp v4.8h, v5.8h, v7.8h", - "addp v3.8h, v6.8h, v3.8h", - "trn1 v5.4s, v4.4s, v3.4s", - "trn2 v3.4s, v4.4s, v3.4s", - "addp v3.8h, v5.8h, v3.8h", + "mov z5.q, z2.q[1]", + "mov z2.q, z3.q[1]", + "dup v3.4s, v2.s[0]", + "ext v2.16b, v5.16b, v5.16b, #0", + "ext v6.16b, v5.16b, v5.16b, #1", + "ext v7.16b, v5.16b, v5.16b, #2", + "ext v8.16b, v5.16b, v5.16b, #3", + "uabdl v5.8h, v2.8b, v3.8b", + "uabdl v2.8h, v6.8b, v3.8b", + "uabdl v6.8h, v7.8b, v3.8b", + "uabdl v7.8h, v8.8b, v3.8b", + "addp v3.8h, v5.8h, v6.8h", + "addp v5.8h, v2.8h, v7.8h", + "trn1 v2.4s, v3.4s, v5.4s", + "trn2 v6.4s, v3.4s, v5.4s", + "addp v3.8h, v2.8h, v6.8h", "mov z1.q, q3", - "mov z16.d, z2.d", + "mov z2.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vmpsadbw ymm0, ymm1, ymm2, 100b": { - "ExpectedInstructionCount": 34, + "ExpectedInstructionCount": 37, "Comment": [ "Map 3 0b01 0x42 256-bit" ], "ExpectedArm64ASM": [ - "dup v2.4s, v18.s[0]", - "ext v3.16b, v17.16b, v17.16b, #4", - "ext v4.16b, v17.16b, v17.16b, #5", - "ext v5.16b, v17.16b, v17.16b, #6", - "ext v6.16b, v17.16b, v17.16b, #7", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v2.8h, v4.8h, v2.8h", - "mov z3.q, z17.q[1]", - "mov z4.q, z18.q[1]", - "dup v4.4s, v4.s[0]", - "ext v5.16b, v3.16b, v3.16b, #0", - "ext v6.16b, v3.16b, v3.16b, #1", - "ext v7.16b, v3.16b, v3.16b, #2", - "ext v3.16b, v3.16b, v3.16b, #3", - "uabdl v5.8h, v5.8b, v4.8b", - "uabdl v6.8h, v6.8b, v4.8b", - "uabdl v7.8h, v7.8b, v4.8b", - "uabdl v3.8h, v3.8b, v4.8b", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v4.4s, v3.s[0]", + "ext v5.16b, v2.16b, v2.16b, #4", + "ext v6.16b, v2.16b, v2.16b, #5", + "ext v7.16b, v2.16b, v2.16b, #6", + "ext v8.16b, v2.16b, v2.16b, #7", + "uabdl v9.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "uabdl v7.8h, v8.8b, v4.8b", + "addp v4.8h, v9.8h, v6.8h", + "addp v6.8h, v5.8h, v7.8h", + "trn1 v5.4s, v4.4s, v6.4s", + "trn2 v7.4s, v4.4s, v6.4s", "addp v4.8h, v5.8h, v7.8h", - "addp v3.8h, v6.8h, v3.8h", - "trn1 v5.4s, v4.4s, v3.4s", - "trn2 v3.4s, v4.4s, v3.4s", - "addp v3.8h, v5.8h, v3.8h", + "mov z5.q, z2.q[1]", + "mov z2.q, z3.q[1]", + "dup v3.4s, v2.s[0]", + "ext v2.16b, v5.16b, v5.16b, #0", + "ext v6.16b, v5.16b, v5.16b, #1", + "ext v7.16b, v5.16b, v5.16b, #2", + "ext v8.16b, v5.16b, v5.16b, #3", + "uabdl v5.8h, v2.8b, v3.8b", + "uabdl v2.8h, v6.8b, v3.8b", + "uabdl v6.8h, v7.8b, v3.8b", + "uabdl v7.8h, v8.8b, v3.8b", + "addp v3.8h, v5.8h, v6.8h", + "addp v5.8h, v2.8h, v7.8h", + "trn1 v2.4s, v3.4s, v5.4s", + "trn2 v6.4s, v3.4s, v5.4s", + "addp v3.8h, v2.8h, v6.8h", "mov z1.q, q3", - "mov z16.d, z2.d", + "mov z2.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vmpsadbw ymm0, ymm1, ymm2, 101b": { - "ExpectedInstructionCount": 34, + "ExpectedInstructionCount": 37, "Comment": [ "Map 3 0b01 0x42 256-bit" ], "ExpectedArm64ASM": [ - "dup v2.4s, v18.s[1]", - "ext v3.16b, v17.16b, v17.16b, #4", - "ext v4.16b, v17.16b, v17.16b, #5", - "ext v5.16b, v17.16b, v17.16b, #6", - "ext v6.16b, v17.16b, v17.16b, #7", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v2.8h, v4.8h, v2.8h", - "mov z3.q, z17.q[1]", - "mov z4.q, z18.q[1]", - "dup v4.4s, v4.s[0]", - "ext v5.16b, v3.16b, v3.16b, #0", - "ext v6.16b, v3.16b, v3.16b, #1", - "ext v7.16b, v3.16b, v3.16b, #2", - "ext v3.16b, v3.16b, v3.16b, #3", - "uabdl v5.8h, v5.8b, v4.8b", - "uabdl v6.8h, v6.8b, v4.8b", - "uabdl v7.8h, v7.8b, v4.8b", - "uabdl v3.8h, v3.8b, v4.8b", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v4.4s, v3.s[1]", + "ext v5.16b, v2.16b, v2.16b, #4", + "ext v6.16b, v2.16b, v2.16b, #5", + "ext v7.16b, v2.16b, v2.16b, #6", + "ext v8.16b, v2.16b, v2.16b, #7", + "uabdl v9.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "uabdl v7.8h, v8.8b, v4.8b", + "addp v4.8h, v9.8h, v6.8h", + "addp v6.8h, v5.8h, v7.8h", + "trn1 v5.4s, v4.4s, v6.4s", + "trn2 v7.4s, v4.4s, v6.4s", "addp v4.8h, v5.8h, v7.8h", - "addp v3.8h, v6.8h, v3.8h", - "trn1 v5.4s, v4.4s, v3.4s", - "trn2 v3.4s, v4.4s, v3.4s", - "addp v3.8h, v5.8h, v3.8h", + "mov z5.q, z2.q[1]", + "mov z2.q, z3.q[1]", + "dup v3.4s, v2.s[0]", + "ext v2.16b, v5.16b, v5.16b, #0", + "ext v6.16b, v5.16b, v5.16b, #1", + "ext v7.16b, v5.16b, v5.16b, #2", + "ext v8.16b, v5.16b, v5.16b, #3", + "uabdl v5.8h, v2.8b, v3.8b", + "uabdl v2.8h, v6.8b, v3.8b", + "uabdl v6.8h, v7.8b, v3.8b", + "uabdl v7.8h, v8.8b, v3.8b", + "addp v3.8h, v5.8h, v6.8h", + "addp v5.8h, v2.8h, v7.8h", + "trn1 v2.4s, v3.4s, v5.4s", + "trn2 v6.4s, v3.4s, v5.4s", + "addp v3.8h, v2.8h, v6.8h", "mov z1.q, q3", - "mov z16.d, z2.d", + "mov z2.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vmpsadbw ymm0, ymm1, ymm2, 110b": { - "ExpectedInstructionCount": 34, + "ExpectedInstructionCount": 37, "Comment": [ "Map 3 0b01 0x42 256-bit" ], "ExpectedArm64ASM": [ - "dup v2.4s, v18.s[2]", - "ext v3.16b, v17.16b, v17.16b, #4", - "ext v4.16b, v17.16b, v17.16b, #5", - "ext v5.16b, v17.16b, v17.16b, #6", - "ext v6.16b, v17.16b, v17.16b, #7", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v2.8h, v4.8h, v2.8h", - "mov z3.q, z17.q[1]", - "mov z4.q, z18.q[1]", - "dup v4.4s, v4.s[0]", - "ext v5.16b, v3.16b, v3.16b, #0", - "ext v6.16b, v3.16b, v3.16b, #1", - "ext v7.16b, v3.16b, v3.16b, #2", - "ext v3.16b, v3.16b, v3.16b, #3", - "uabdl v5.8h, v5.8b, v4.8b", - "uabdl v6.8h, v6.8b, v4.8b", - "uabdl v7.8h, v7.8b, v4.8b", - "uabdl v3.8h, v3.8b, v4.8b", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v4.4s, v3.s[2]", + "ext v5.16b, v2.16b, v2.16b, #4", + "ext v6.16b, v2.16b, v2.16b, #5", + "ext v7.16b, v2.16b, v2.16b, #6", + "ext v8.16b, v2.16b, v2.16b, #7", + "uabdl v9.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "uabdl v7.8h, v8.8b, v4.8b", + "addp v4.8h, v9.8h, v6.8h", + "addp v6.8h, v5.8h, v7.8h", + "trn1 v5.4s, v4.4s, v6.4s", + "trn2 v7.4s, v4.4s, v6.4s", "addp v4.8h, v5.8h, v7.8h", - "addp v3.8h, v6.8h, v3.8h", - "trn1 v5.4s, v4.4s, v3.4s", - "trn2 v3.4s, v4.4s, v3.4s", - "addp v3.8h, v5.8h, v3.8h", + "mov z5.q, z2.q[1]", + "mov z2.q, z3.q[1]", + "dup v3.4s, v2.s[0]", + "ext v2.16b, v5.16b, v5.16b, #0", + "ext v6.16b, v5.16b, v5.16b, #1", + "ext v7.16b, v5.16b, v5.16b, #2", + "ext v8.16b, v5.16b, v5.16b, #3", + "uabdl v5.8h, v2.8b, v3.8b", + "uabdl v2.8h, v6.8b, v3.8b", + "uabdl v6.8h, v7.8b, v3.8b", + "uabdl v7.8h, v8.8b, v3.8b", + "addp v3.8h, v5.8h, v6.8h", + "addp v5.8h, v2.8h, v7.8h", + "trn1 v2.4s, v3.4s, v5.4s", + "trn2 v6.4s, v3.4s, v5.4s", + "addp v3.8h, v2.8h, v6.8h", "mov z1.q, q3", - "mov z16.d, z2.d", + "mov z2.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vmpsadbw ymm0, ymm1, ymm2, 111b": { - "ExpectedInstructionCount": 34, + "ExpectedInstructionCount": 37, "Comment": [ "Map 3 0b01 0x42 256-bit" ], "ExpectedArm64ASM": [ - "dup v2.4s, v18.s[3]", - "ext v3.16b, v17.16b, v17.16b, #4", - "ext v4.16b, v17.16b, v17.16b, #5", - "ext v5.16b, v17.16b, v17.16b, #6", - "ext v6.16b, v17.16b, v17.16b, #7", - "uabdl v3.8h, v3.8b, v2.8b", - "uabdl v4.8h, v4.8b, v2.8b", - "uabdl v5.8h, v5.8b, v2.8b", - "uabdl v2.8h, v6.8b, v2.8b", - "addp v3.8h, v3.8h, v5.8h", - "addp v2.8h, v4.8h, v2.8h", - "trn1 v4.4s, v3.4s, v2.4s", - "trn2 v2.4s, v3.4s, v2.4s", - "addp v2.8h, v4.8h, v2.8h", - "mov z3.q, z17.q[1]", - "mov z4.q, z18.q[1]", - "dup v4.4s, v4.s[0]", - "ext v5.16b, v3.16b, v3.16b, #0", - "ext v6.16b, v3.16b, v3.16b, #1", - "ext v7.16b, v3.16b, v3.16b, #2", - "ext v3.16b, v3.16b, v3.16b, #3", - "uabdl v5.8h, v5.8b, v4.8b", - "uabdl v6.8h, v6.8b, v4.8b", - "uabdl v7.8h, v7.8b, v4.8b", - "uabdl v3.8h, v3.8b, v4.8b", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v4.4s, v3.s[3]", + "ext v5.16b, v2.16b, v2.16b, #4", + "ext v6.16b, v2.16b, v2.16b, #5", + "ext v7.16b, v2.16b, v2.16b, #6", + "ext v8.16b, v2.16b, v2.16b, #7", + "uabdl v9.8h, v5.8b, v4.8b", + "uabdl v5.8h, v6.8b, v4.8b", + "uabdl v6.8h, v7.8b, v4.8b", + "uabdl v7.8h, v8.8b, v4.8b", + "addp v4.8h, v9.8h, v6.8h", + "addp v6.8h, v5.8h, v7.8h", + "trn1 v5.4s, v4.4s, v6.4s", + "trn2 v7.4s, v4.4s, v6.4s", "addp v4.8h, v5.8h, v7.8h", - "addp v3.8h, v6.8h, v3.8h", - "trn1 v5.4s, v4.4s, v3.4s", - "trn2 v3.4s, v4.4s, v3.4s", - "addp v3.8h, v5.8h, v3.8h", + "mov z5.q, z2.q[1]", + "mov z2.q, z3.q[1]", + "dup v3.4s, v2.s[0]", + "ext v2.16b, v5.16b, v5.16b, #0", + "ext v6.16b, v5.16b, v5.16b, #1", + "ext v7.16b, v5.16b, v5.16b, #2", + "ext v8.16b, v5.16b, v5.16b, #3", + "uabdl v5.8h, v2.8b, v3.8b", + "uabdl v2.8h, v6.8b, v3.8b", + "uabdl v6.8h, v7.8b, v3.8b", + "uabdl v7.8h, v8.8b, v3.8b", + "addp v3.8h, v5.8h, v6.8h", + "addp v5.8h, v2.8h, v7.8h", + "trn1 v2.4s, v3.4s, v5.4s", + "trn2 v6.4s, v3.4s, v5.4s", + "addp v3.8h, v2.8h, v6.8h", "mov z1.q, q3", - "mov z16.d, z2.d", + "mov z2.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vpclmulqdq xmm0, xmm1, xmm2, 00000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 3 0b01 0x44 128-bit" ], "ExpectedArm64ASM": [ - "pmull v16.1q, v17.1d, v18.1d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "pmull v4.1q, v2.1d, v3.1d", + "mov z16.d, p7/m, z4.d" ] }, "vpclmulqdq xmm0, xmm1, xmm2, 00001b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 3 0b01 0x44 128-bit" ], "ExpectedArm64ASM": [ - "dup v0.2d, v17.d[1]", - "pmull v16.1q, v0.1d, v18.1d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v0.2d, v2.d[1]", + "pmull v4.1q, v0.1d, v3.1d", + "mov z16.d, p7/m, z4.d" ] }, "vpclmulqdq xmm0, xmm1, xmm2, 10000b": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 5, "Comment": [ "Map 3 0b01 0x44 128-bit" ], "ExpectedArm64ASM": [ - "dup v0.2d, v18.d[1]", - "pmull v16.1q, v0.1d, v17.1d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "dup v0.2d, v3.d[1]", + "pmull v4.1q, v0.1d, v2.1d", + "mov z16.d, p7/m, z4.d" ] }, "vpclmulqdq xmm0, xmm1, xmm2, 10001b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 4, "Comment": [ "Map 3 0b01 0x44 128-bit" ], "ExpectedArm64ASM": [ - "pmull2 v16.1q, v17.2d, v18.2d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "pmull2 v4.1q, v2.2d, v3.2d", + "mov z16.d, p7/m, z4.d" ] }, "vpclmulqdq ymm0, ymm1, ymm2, 00000b": { @@ -4163,424 +4944,521 @@ ] }, "vperm2i128 ymm0, ymm1, ymm2, 00000000b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q17", - "mov z2.b, p6/m, z1.b", - "mov z1.q, q17", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.q, q2", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z1.q, q2", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00000001b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z17.q[1]", - "mov z2.b, p6/m, z1.b", - "mov z1.q, q17", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z1.q, q2", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00000010b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q18", - "mov z2.b, p6/m, z1.b", - "mov z1.q, q17", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.q, q3", + "mov z5.d, z4.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, q2", + "mov z3.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00000011b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z18.q[1]", - "mov z2.b, p6/m, z1.b", - "mov z1.q, q17", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.q, z3.q[1]", + "mov z5.d, z4.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, q2", + "mov z3.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00010000b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q17", - "mov z2.b, p6/m, z1.b", - "mov z1.q, z17.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.q, q2", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z1.q, z2.q[1]", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00010001b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z17.q[1]", - "mov z2.b, p6/m, z1.b", - "mov z1.q, z17.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z1.q, z2.q[1]", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00010010b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q18", - "mov z2.b, p6/m, z1.b", - "mov z1.q, z17.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.q, q3", + "mov z5.d, z4.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, z2.q[1]", + "mov z3.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00010011b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z18.q[1]", - "mov z2.b, p6/m, z1.b", - "mov z1.q, z17.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.q, z3.q[1]", + "mov z5.d, z4.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, z2.q[1]", + "mov z3.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00100000b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q17", - "mov z2.b, p6/m, z1.b", - "mov z1.q, q18", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.q, q2", + "mov z5.d, z4.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, q3", + "mov z2.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00100001b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z17.q[1]", - "mov z2.b, p6/m, z1.b", - "mov z1.q, q18", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z5.d, z4.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, q3", + "mov z2.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00100010b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q18", - "mov z2.b, p6/m, z1.b", - "mov z1.q, q18", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z18.d", + "movi v3.2d, #0x0", + "mov z1.q, q2", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z1.q, q2", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00100011b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z18.q[1]", - "mov z2.b, p6/m, z1.b", - "mov z1.q, q18", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z18.d", + "movi v3.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z1.q, q2", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00110000b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q17", - "mov z2.b, p6/m, z1.b", - "mov z1.q, z18.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.q, q2", + "mov z5.d, z4.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, z3.q[1]", + "mov z2.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00110001b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 11, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z17.q[1]", - "mov z2.b, p6/m, z1.b", - "mov z1.q, z18.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "movi v4.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z5.d, z4.d", + "mov z5.b, p6/m, z1.b", + "mov z1.q, z3.q[1]", + "mov z2.d, z5.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00110010b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q18", - "mov z2.b, p6/m, z1.b", - "mov z1.q, z18.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z18.d", + "movi v3.2d, #0x0", + "mov z1.q, q2", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z1.q, z2.q[1]", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00110011b": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z18.q[1]", - "mov z2.b, p6/m, z1.b", - "mov z1.q, z18.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z18.d", + "movi v3.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z1.q, z2.q[1]", + "mov z3.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z3.b, p0/m, z1.b", + "mov z16.d, p7/m, z3.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00001000b": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q17", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.q, q2", + "mov z4.d, z3.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z4.b, p0/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00011000b": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z17.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z4.d, z3.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z4.b, p0/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00101000b": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q18", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z18.d", + "movi v3.2d, #0x0", + "mov z1.q, q2", + "mov z4.d, z3.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z4.b, p0/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 00111000b": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z18.q[1]", - "mov z16.d, z2.d", + "mov z2.d, p7/m, z18.d", + "movi v3.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z4.d, z3.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z4.b, p0/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 10001000b": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov z16.d, p7/m, z2.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 10000000b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q17", - "mov z16.d, z2.d", - "mov z16.b, p6/m, z1.b" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.q, q2", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 10000001b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z17.q[1]", - "mov z16.d, z2.d", - "mov z16.b, p6/m, z1.b" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 10000010b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, q18", - "mov z16.d, z2.d", - "mov z16.b, p6/m, z1.b" + "mov z2.d, p7/m, z18.d", + "movi v3.2d, #0x0", + "mov z1.q, q2", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vperm2i128 ymm0, ymm1, ymm2, 10000011b": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 6, "Comment": [ "Map 3 0b01 0x46 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "mov z1.q, z18.q[1]", - "mov z16.d, z2.d", - "mov z16.b, p6/m, z1.b" + "mov z2.d, p7/m, z18.d", + "movi v3.2d, #0x0", + "mov z1.q, z2.q[1]", + "mov z4.d, z3.d", + "mov z4.b, p6/m, z1.b", + "mov z16.d, p7/m, z4.d" ] }, "vblendvps xmm0, xmm1, xmm2, xmm3": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0x4a 128-bit" ], "ExpectedArm64ASM": [ - "sshr v2.4s, v19.4s, #31", - "mov v16.16b, v2.16b", - "bsl v16.16b, v18.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z4.d, p7/m, z19.d", + "sshr v5.4s, v4.4s, #31", + "mov v4.16b, v5.16b", + "bsl v4.16b, v3.16b, v2.16b", + "mov z16.d, p7/m, z4.d" ] }, "vblendvps ymm0, ymm1, ymm2, ymm3": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "Map 3 0b01 0x4a 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z2, z19", - "asr z2.s, p7/m, z2.s, #31", - "movprfx z0, z18", - "bsl z0.d, z0.d, z17.d, z2.d", - "mov z16.d, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z4.d, p7/m, z19.d", + "movprfx z5, z4", + "asr z5.s, p7/m, z5.s, #31", + "movprfx z0, z3", + "bsl z0.d, z0.d, z2.d, z5.d", + "mov z4.d, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vblendvpd xmm0, xmm1, xmm2, xmm3": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0x4b 128-bit" ], "ExpectedArm64ASM": [ - "sshr v2.2d, v19.2d, #63", - "mov v16.16b, v2.16b", - "bsl v16.16b, v18.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z4.d, p7/m, z19.d", + "sshr v5.2d, v4.2d, #63", + "mov v4.16b, v5.16b", + "bsl v4.16b, v3.16b, v2.16b", + "mov z16.d, p7/m, z4.d" ] }, "vblendvpd ymm0, ymm1, ymm2, ymm3": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "Map 3 0b01 0x4b 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z2, z19", - "asr z2.d, p7/m, z2.d, #63", - "movprfx z0, z18", - "bsl z0.d, z0.d, z17.d, z2.d", - "mov z16.d, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z4.d, p7/m, z19.d", + "movprfx z5, z4", + "asr z5.d, p7/m, z5.d, #63", + "movprfx z0, z3", + "bsl z0.d, z0.d, z2.d, z5.d", + "mov z4.d, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vpblendvb xmm0, xmm1, xmm2, xmm3": { - "ExpectedInstructionCount": 3, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0x4c 128-bit" ], "ExpectedArm64ASM": [ - "sshr v2.16b, v19.16b, #7", - "mov v16.16b, v2.16b", - "bsl v16.16b, v18.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z4.d, p7/m, z19.d", + "sshr v5.16b, v4.16b, #7", + "mov v4.16b, v5.16b", + "bsl v4.16b, v3.16b, v2.16b", + "mov z16.d, p7/m, z4.d" ] }, "vpblendvb ymm0, ymm1, ymm2, ymm3": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 9, "Comment": [ "Map 3 0b01 0x4c 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z2, z19", - "asr z2.b, p7/m, z2.b, #7", - "movprfx z0, z18", - "bsl z0.d, z0.d, z17.d, z2.d", - "mov z16.d, z0.d" + "mov z2.d, p7/m, z17.d", + "mov z3.d, p7/m, z18.d", + "mov z4.d, p7/m, z19.d", + "movprfx z5, z4", + "asr z5.b, p7/m, z5.b, #7", + "movprfx z0, z3", + "bsl z0.d, z0.d, z2.d, z5.d", + "mov z4.d, z0.d", + "mov z16.d, p7/m, z4.d" ] }, "vfmaddsubps xmm0, xmm1, xmm2, xmm3": { @@ -4794,86 +5672,101 @@ ] }, "vaeskeygenassist xmm0, xmm1, 0": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 7, "Comment": [ "Map 3 0b01 0xdf 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2160]", - "movi v3.2d, #0x0", - "mov v16.16b, v17.16b", + "mov z2.d, p7/m, z17.d", + "ldr q3, [x28, #2160]", + "movi v4.2d, #0x0", + "mov v5.16b, v2.16b", "unimplemented (Unimplemented)", - "tbl v16.16b, {v16.16b}, v2.16b" + "tbl v5.16b, {v5.16b}, v3.16b", + "mov z16.d, p7/m, z5.d" ] }, "vaeskeygenassist xmm0, xmm1, 0xFF": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": [ "Map 3 0b01 0xdf 128-bit" ], "ExpectedArm64ASM": [ - "ldr q2, [x28, #2160]", - "movi v3.2d, #0x0", - "mov v16.16b, v17.16b", + "mov z2.d, p7/m, z17.d", + "ldr q3, [x28, #2160]", + "movi v4.2d, #0x0", + "mov v5.16b, v2.16b", "unimplemented (Unimplemented)", - "tbl v16.16b, {v16.16b}, v2.16b", + "tbl v5.16b, {v5.16b}, v3.16b", "mov x0, #0xff00000000", "dup v1.2d, x0", - "eor v16.16b, v16.16b, v1.16b" + "eor v5.16b, v5.16b, v1.16b", + "mov z16.d, p7/m, z5.d" ] }, "rorx eax, ebx, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b11 0xf0 32-bit" ], "ExpectedArm64ASM": [ - "mov w4, w7" + "mov x20, x7", + "mov w21, w20", + "mov x4, x21" ] }, "rorx eax, eax, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b11 0xf0 32-bit" ], "ExpectedArm64ASM": [ - "mov w4, w4" + "mov x20, x4", + "mov w21, w20", + "mov x4, x21" ] }, "rorx eax, ebx, 31": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b11 0xf0 32-bit" ], "ExpectedArm64ASM": [ - "ror w4, w7, #31" + "mov x20, x7", + "ror w21, w20, #31", + "mov x4, x21" ] }, "rorx eax, ebx, 32": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b11 0xf0 32-bit" ], "ExpectedArm64ASM": [ - "mov w4, w7" + "mov x20, x7", + "mov w21, w20", + "mov x4, x21" ] }, "rorx eax, eax, 32": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b11 0xf0 32-bit" ], "ExpectedArm64ASM": [ - "mov w4, w4" + "mov x20, x4", + "mov w21, w20", + "mov x4, x21" ] }, "rorx rax, rbx, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b11 0xf0 64-bit" ], "ExpectedArm64ASM": [ - "mov x4, x7" + "mov x20, x7", + "mov x4, x20" ] }, "rorx rax, rax, 0": { @@ -4884,21 +5777,24 @@ "ExpectedArm64ASM": [] }, "rorx rax, rbx, 63": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map 3 0b11 0xf0 64-bit" ], "ExpectedArm64ASM": [ - "ror x4, x7, #63" + "mov x20, x7", + "ror x21, x20, #63", + "mov x4, x21" ] }, "rorx rax, rbx, 64": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map 3 0b11 0xf0 64-bit" ], "ExpectedArm64ASM": [ - "mov x4, x7" + "mov x20, x7", + "mov x4, x20" ] }, "rorx rax, rax, 64": { diff --git a/unittests/InstructionCountCI/VEX_map_group.json b/unittests/InstructionCountCI/VEX_map_group.json index 21859ec7f2..ac9b749e2c 100644 --- a/unittests/InstructionCountCI/VEX_map_group.json +++ b/unittests/InstructionCountCI/VEX_map_group.json @@ -11,579 +11,686 @@ }, "Instructions": { "vpsrlw xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 12 0b010 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpsrlw xmm0, xmm1, 15": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 12 0b010 128-bit" ], "ExpectedArm64ASM": [ - "ushr v16.8h, v17.8h, #15" + "mov z2.d, p7/m, z17.d", + "ushr v3.8h, v2.8h, #15", + "mov z16.d, p7/m, z3.d" ] }, "vpsrlw xmm0, xmm1, 16": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 12 0b010 128-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z16.d, p7/m, z3.d" ] }, "vpsrlw ymm0, ymm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map group 12 0b010 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "mov z16.d, p7/m, z2.d" ] }, "vpsrlw ymm0, ymm1, 15": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map group 12 0b010 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "lsr z16.h, p7/m, z16.h, #15" + "mov z2.d, p7/m, z17.d", + "movprfx z3, z2", + "lsr z3.h, p7/m, z3.h, #15", + "mov z16.d, p7/m, z3.d" ] }, "vpsrlw ymm0, ymm1, 16": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 12 0b010 256-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z16.d, p7/m, z3.d" ] }, "vpsraw xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 12 0b100 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpsraw xmm0, xmm1, 15": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 12 0b100 128-bit" ], "ExpectedArm64ASM": [ - "sshr v16.8h, v17.8h, #15" + "mov z2.d, p7/m, z17.d", + "sshr v3.8h, v2.8h, #15", + "mov z16.d, p7/m, z3.d" ] }, "vpsraw xmm0, xmm1, 16": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 12 0b100 128-bit" ], "ExpectedArm64ASM": [ - "sshr v16.8h, v17.8h, #15" + "mov z2.d, p7/m, z17.d", + "sshr v3.8h, v2.8h, #15", + "mov z16.d, p7/m, z3.d" ] }, "vpsraw ymm0, ymm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map group 12 0b100 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "mov z16.d, p7/m, z2.d" ] }, "vpsraw ymm0, ymm1, 15": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map group 12 0b100 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "asr z16.h, p7/m, z16.h, #15" + "mov z2.d, p7/m, z17.d", + "movprfx z3, z2", + "asr z3.h, p7/m, z3.h, #15", + "mov z16.d, p7/m, z3.d" ] }, "vpsraw ymm0, ymm1, 16": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map group 12 0b100 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "asr z16.h, p7/m, z16.h, #15" + "mov z2.d, p7/m, z17.d", + "movprfx z3, z2", + "asr z3.h, p7/m, z3.h, #15", + "mov z16.d, p7/m, z3.d" ] }, "vpsllw xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 12 0b110 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpsllw xmm0, xmm1, 15": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 12 0b110 128-bit" ], "ExpectedArm64ASM": [ - "shl v16.8h, v17.8h, #15" + "mov z2.d, p7/m, z17.d", + "shl v3.8h, v2.8h, #15", + "mov z16.d, p7/m, z3.d" ] }, "vpsllw xmm0, xmm1, 16": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 12 0b110 128-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z16.d, p7/m, z3.d" ] }, "vpsllw ymm0, ymm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map group 12 0b110 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "mov z16.d, p7/m, z2.d" ] }, "vpsllw ymm0, ymm1, 15": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map group 12 0b110 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "lsl z16.h, p7/m, z16.h, #15" + "mov z2.d, p7/m, z17.d", + "movprfx z3, z2", + "lsl z3.h, p7/m, z3.h, #15", + "mov z16.d, p7/m, z3.d" ] }, "vpsllw ymm0, ymm1, 16": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 12 0b110 256-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z16.d, p7/m, z3.d" ] }, "vpsrld xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 13 0b010 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpsrld xmm0, xmm1, 31": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 13 0b010 128-bit" ], "ExpectedArm64ASM": [ - "ushr v16.4s, v17.4s, #31" + "mov z2.d, p7/m, z17.d", + "ushr v3.4s, v2.4s, #31", + "mov z16.d, p7/m, z3.d" ] }, "vpsrld xmm0, xmm1, 32": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 13 0b010 128-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z16.d, p7/m, z3.d" ] }, "vpsrld ymm0, ymm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map group 13 0b010 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "mov z16.d, p7/m, z2.d" ] }, "vpsrld ymm0, ymm1, 31": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map group 13 0b010 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "lsr z16.s, p7/m, z16.s, #31" + "mov z2.d, p7/m, z17.d", + "movprfx z3, z2", + "lsr z3.s, p7/m, z3.s, #31", + "mov z16.d, p7/m, z3.d" ] }, "vpsrld ymm0, ymm1, 32": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 13 0b010 256-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z16.d, p7/m, z3.d" ] }, "vpsrad xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 13 0b100 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpsrad xmm0, xmm1, 31": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 13 0b100 128-bit" ], "ExpectedArm64ASM": [ - "sshr v16.4s, v17.4s, #31" + "mov z2.d, p7/m, z17.d", + "sshr v3.4s, v2.4s, #31", + "mov z16.d, p7/m, z3.d" ] }, "vpsrad xmm0, xmm1, 32": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 13 0b100 128-bit" ], "ExpectedArm64ASM": [ - "sshr v16.4s, v17.4s, #31" + "mov z2.d, p7/m, z17.d", + "sshr v3.4s, v2.4s, #31", + "mov z16.d, p7/m, z3.d" ] }, "vpsrad ymm0, ymm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map group 13 0b100 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "mov z16.d, p7/m, z2.d" ] }, "vpsrad ymm0, ymm1, 31": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map group 13 0b100 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "asr z16.s, p7/m, z16.s, #31" + "mov z2.d, p7/m, z17.d", + "movprfx z3, z2", + "asr z3.s, p7/m, z3.s, #31", + "mov z16.d, p7/m, z3.d" ] }, "vpsrad ymm0, ymm1, 32": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map group 13 0b100 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "asr z16.s, p7/m, z16.s, #31" + "mov z2.d, p7/m, z17.d", + "movprfx z3, z2", + "asr z3.s, p7/m, z3.s, #31", + "mov z16.d, p7/m, z3.d" ] }, "vpslld xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 13 0b110 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpslld xmm0, xmm1, 31": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 13 0b110 128-bit" ], "ExpectedArm64ASM": [ - "shl v16.4s, v17.4s, #31" + "mov z2.d, p7/m, z17.d", + "shl v3.4s, v2.4s, #31", + "mov z16.d, p7/m, z3.d" ] }, "vpslld xmm0, xmm1, 32": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 13 0b110 128-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z16.d, p7/m, z3.d" ] }, "vpslld ymm0, ymm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map group 13 0b110 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "mov z16.d, p7/m, z2.d" ] }, "vpslld ymm0, ymm1, 31": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map group 13 0b110 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "lsl z16.s, p7/m, z16.s, #31" + "mov z2.d, p7/m, z17.d", + "movprfx z3, z2", + "lsl z3.s, p7/m, z3.s, #31", + "mov z16.d, p7/m, z3.d" ] }, "vpslld ymm0, ymm1, 32": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 13 0b110 256-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z16.d, p7/m, z3.d" ] }, "vpsrlq xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 14 0b010 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpsrlq xmm0, xmm1, 63": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 14 0b010 128-bit" ], "ExpectedArm64ASM": [ - "ushr v16.2d, v17.2d, #63" + "mov z2.d, p7/m, z17.d", + "ushr v3.2d, v2.2d, #63", + "mov z16.d, p7/m, z3.d" ] }, "vpsrlq xmm0, xmm1, 64": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 14 0b010 128-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z16.d, p7/m, z3.d" ] }, "vpsrlq ymm0, ymm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map group 14 0b010 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "mov z16.d, p7/m, z2.d" ] }, "vpsrlq ymm0, ymm1, 63": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map group 14 0b010 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "lsr z16.d, p7/m, z16.d, #63" + "mov z2.d, p7/m, z17.d", + "movprfx z3, z2", + "lsr z3.d, p7/m, z3.d, #63", + "mov z16.d, p7/m, z3.d" ] }, "vpsrlq ymm0, ymm1, 64": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 14 0b010 256-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z16.d, p7/m, z3.d" ] }, "vpsrldq xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 14 0b011 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpsrldq xmm0, xmm1, 15": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map group 14 0b011 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "ext v16.16b, v17.16b, v2.16b, #15" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "ext v4.16b, v2.16b, v3.16b, #15", + "mov z16.d, p7/m, z4.d" ] }, "vpsrldq xmm0, xmm1, 16": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map group 14 0b011 128-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov z16.d, p7/m, z2.d" ] }, "vpsrldq ymm0, ymm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map group 14 0b011 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "mov z16.d, p7/m, z2.d" ] }, "vpsrldq ymm0, ymm1, 15": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": [ "Map group 14 0b011 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "ext v3.16b, v17.16b, v2.16b, #15", - "movprfx z1, z17", - "ext z1.b, z1.b, z2.b, #31", - "mov z2.d, z1.d", - "mov z1.q, q2", - "mov z16.d, z3.d", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "ext v4.16b, v2.16b, v3.16b, #15", + "movprfx z5, z2", + "ext z5.b, z5.b, z3.b, #31", + "mov z1.q, q5", + "mov z2.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vpsrldq ymm0, ymm1, 16": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map group 14 0b011 256-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov z16.d, p7/m, z2.d" ] }, "vpsllq xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 14 0b110 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpsllq xmm0, xmm1, 63": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 14 0b110 128-bit" ], "ExpectedArm64ASM": [ - "shl v16.2d, v17.2d, #63" + "mov z2.d, p7/m, z17.d", + "shl v3.2d, v2.2d, #63", + "mov z16.d, p7/m, z3.d" ] }, "vpsllq xmm0, xmm1, 64": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 14 0b110 128-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z16.d, p7/m, z3.d" ] }, "vpsllq ymm0, ymm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map group 14 0b110 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "mov z16.d, p7/m, z2.d" ] }, "vpsllq ymm0, ymm1, 63": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map group 14 0b110 256-bit" ], "ExpectedArm64ASM": [ - "movprfx z16, z17", - "lsl z16.d, p7/m, z16.d, #63" + "mov z2.d, p7/m, z17.d", + "movprfx z3, z2", + "lsl z3.d, p7/m, z3.d, #63", + "mov z16.d, p7/m, z3.d" ] }, "vpsllq ymm0, ymm1, 64": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 14 0b110 256-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "mov z16.d, p7/m, z3.d" ] }, "vpslldq xmm0, xmm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 3, "Comment": [ "Map group 14 0b111 128-bit" ], "ExpectedArm64ASM": [ - "mov v16.16b, v17.16b" + "mov z2.d, p7/m, z17.d", + "mov v3.16b, v2.16b", + "mov z16.d, p7/m, z3.d" ] }, "vpslldq xmm0, xmm1, 15": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 4, "Comment": [ "Map group 14 0b111 128-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "ext v16.16b, v2.16b, v17.16b, #1" + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "ext v4.16b, v3.16b, v2.16b, #1", + "mov z16.d, p7/m, z4.d" ] }, "vpslldq xmm0, xmm1, 16": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map group 14 0b111 128-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov z16.d, p7/m, z2.d" ] }, "vpslldq ymm0, ymm1, 0": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map group 14 0b111 256-bit" ], "ExpectedArm64ASM": [ - "mov z16.d, p7/m, z17.d" + "mov z2.d, p7/m, z17.d", + "mov z16.d, p7/m, z2.d" ] }, "vpslldq ymm0, ymm1, 15": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 10, "Comment": [ "Map group 14 0b111 256-bit" ], "ExpectedArm64ASM": [ - "movi v2.2d, #0x0", - "ext v3.16b, v2.16b, v17.16b, #1", - "ext z2.b, z2.b, z17.b, #17", - "mov z1.q, q2", - "mov z16.d, z3.d", + "mov z2.d, p7/m, z17.d", + "movi v3.2d, #0x0", + "ext v4.16b, v3.16b, v2.16b, #1", + "movprfx z5, z3", + "ext z5.b, z5.b, z2.b, #17", + "mov z1.q, q5", + "mov z2.d, z4.d", "not p0.b, p7/z, p6.b", - "mov z16.b, p0/m, z1.b" + "mov z2.b, p0/m, z1.b", + "mov z16.d, p7/m, z2.d" ] }, "vpslldq ymm0, ymm1, 16": { - "ExpectedInstructionCount": 1, + "ExpectedInstructionCount": 2, "Comment": [ "Map group 14 0b111 256-bit" ], "ExpectedArm64ASM": [ - "movi v16.2d, #0x0" + "movi v2.2d, #0x0", + "mov z16.d, p7/m, z2.d" ] }, "vldmxcsr [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": [ "Map group 15 0b010" ], "ExpectedArm64ASM": [ - "ldr w20, [x4]", - "ubfx w20, w20, #13, #3", + "mov x20, x4", + "ldr w21, [x20]", + "ubfx w20, w21, #13, #3", "rbit w1, w20", "lsr w1, w1, #30", "mrs x0, fpcr", @@ -594,7 +701,7 @@ ] }, "vstmxcsr [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": [ "Map group 15 0b011" ], @@ -604,102 +711,116 @@ "ubfx x21, x21, #22, #3", "rbit w0, w21", "bfi x21, x0, #30, #2", - "bfi w20, w21, #13, #3", - "str w20, [x4]" + "mov w22, w20", + "bfi w22, w21, #13, #3", + "mov x20, x4", + "str w22, [x20]" ] }, "blsr eax, ebx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": [ "Map group 17 0b001 32-bit" ], "ExpectedArm64ASM": [ - "sub w20, w7, #0x1 (1)", - "and w4, w20, w7", - "cmp x7, #0x0 (0)", - "cset x20, eq", - "tst w4, w4", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "mov x20, x7", + "sub w21, w20, #0x1 (1)", + "and w22, w21, w20", + "mov x4, x22", + "cmp x20, #0x0 (0)", + "cset x21, eq", + "tst w22, w22", + "mrs x20, nzcv", + "orr w22, w20, w21, lsl #29", + "msr nzcv, x22" ] }, "blsr rax, rbx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": [ "Map group 17 0b001 64-bit" ], "ExpectedArm64ASM": [ - "sub x20, x7, #0x1 (1)", - "and x4, x20, x7", - "cmp x7, #0x0 (0)", - "cset x20, eq", - "tst x4, x4", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "mov x20, x7", + "sub x21, x20, #0x1 (1)", + "and x22, x21, x20", + "mov x4, x22", + "cmp x20, #0x0 (0)", + "cset x21, eq", + "tst x22, x22", + "mrs x20, nzcv", + "orr w22, w20, w21, lsl #29", + "msr nzcv, x22" ] }, "blsmsk eax, ebx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": [ "Map group 17 0b010 32-bit" ], "ExpectedArm64ASM": [ - "sub w20, w7, #0x1 (1)", - "eor w4, w20, w7", - "cmp x7, #0x0 (0)", - "cset x20, eq", - "tst w4, w4", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "mov x20, x7", + "sub w21, w20, #0x1 (1)", + "eor w22, w21, w20", + "mov x4, x22", + "cmp x20, #0x0 (0)", + "cset x21, eq", + "tst w22, w22", + "mrs x20, nzcv", + "orr w22, w20, w21, lsl #29", + "msr nzcv, x22" ] }, "blsmsk rax, rbx": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 10, "Comment": [ "Map group 17 0b010 64-bit" ], "ExpectedArm64ASM": [ - "sub x20, x7, #0x1 (1)", - "eor x4, x20, x7", - "cmp x7, #0x0 (0)", - "cset x20, eq", - "tst x4, x4", - "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "mov x20, x7", + "sub x21, x20, #0x1 (1)", + "eor x22, x21, x20", + "mov x4, x22", + "cmp x20, #0x0 (0)", + "cset x21, eq", + "tst x22, x22", + "mrs x20, nzcv", + "orr w22, w20, w21, lsl #29", + "msr nzcv, x22" ] }, "blsi eax, ebx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": [ "Map group 17 0b011 32-bit" ], "ExpectedArm64ASM": [ - "neg w20, w7", - "and w4, w7, w20", - "tst w4, w4", + "mov x20, x7", + "neg w21, w20", + "and w22, w20, w21", + "mov x4, x22", + "tst w22, w22", "cset w20, ne", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "orr w22, w21, w20, lsl #29", + "msr nzcv, x22" ] }, "blsi rax, rbx": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 9, "Comment": [ "Map group 17 0b011 64-bit" ], "ExpectedArm64ASM": [ - "neg x20, x7", - "and x4, x7, x20", - "tst x4, x4", + "mov x20, x7", + "neg x21, x20", + "and x22, x20, x21", + "mov x4, x22", + "tst x22, x22", "cset w20, ne", "mrs x21, nzcv", - "orr w20, w21, w20, lsl #29", - "msr nzcv, x20" + "orr w22, w21, w20, lsl #29", + "msr nzcv, x22" ] } } diff --git a/unittests/InstructionCountCI/x87.json b/unittests/InstructionCountCI/x87.json index 501a424251..1547052002 100644 --- a/unittests/InstructionCountCI/x87.json +++ b/unittests/InstructionCountCI/x87.json @@ -13,13 +13,14 @@ }, "Instructions": { "fadd dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xd8 !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -44,11 +45,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -61,10 +62,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1408]", "blr x5", "ldr w4, [x28, #728]", @@ -76,21 +77,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xd8 !11b /1" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -115,11 +117,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -132,10 +134,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1424]", "blr x5", "ldr w4, [x28, #728]", @@ -147,21 +149,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fcom dword [rax]": { - "ExpectedInstructionCount": 69, + "ExpectedInstructionCount": 70, "Comment": [ "0xd8 !11b /2" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -186,11 +189,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -203,10 +206,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1304]", "blr x5", "ldr w4, [x28, #728]", @@ -221,24 +224,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcomp dword [rax]": { - "ExpectedInstructionCount": 77, + "ExpectedInstructionCount": 78, "Comment": [ "0xd8 !11b /3" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -263,11 +267,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -280,10 +284,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1304]", "blr x5", "ldr w4, [x28, #728]", @@ -298,32 +302,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fsub dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xd8 !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -348,11 +353,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -365,10 +370,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1416]", "blr x5", "ldr w4, [x28, #728]", @@ -380,21 +385,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xd8 !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -419,11 +425,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -436,10 +442,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", - "mov x3, v3.d[0]", - "umov w4, v3.h[4]", + "mov x1, v3.d[0]", + "umov w2, v3.h[4]", + "mov x3, v2.d[0]", + "umov w4, v2.h[4]", "ldr x5, [x28, #1416]", "blr x5", "ldr w4, [x28, #728]", @@ -451,21 +457,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xd8 !11b /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -490,11 +497,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -507,10 +514,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1432]", "blr x5", "ldr w4, [x28, #728]", @@ -522,21 +529,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xd8 !11b /7" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -561,11 +569,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -578,10 +586,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", - "mov x3, v3.d[0]", - "umov w4, v3.h[4]", + "mov x1, v3.d[0]", + "umov w2, v3.h[4]", + "mov x3, v2.d[0]", + "umov w4, v2.h[4]", "ldr x5, [x28, #1432]", "blr x5", "ldr w4, [x28, #728]", @@ -593,11 +601,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fadd st0, st0": { @@ -608,8 +616,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -640,11 +648,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fadd st0, st1": { @@ -655,8 +663,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -687,11 +695,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fadd st0, st2": { @@ -702,8 +710,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -734,11 +742,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fadd st0, st3": { @@ -749,8 +757,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -781,11 +789,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fadd st0, st4": { @@ -796,8 +804,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -828,11 +836,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fadd st0, st5": { @@ -843,8 +851,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -875,11 +883,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fadd st0, st6": { @@ -890,8 +898,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -922,11 +930,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fadd st0, st7": { @@ -937,8 +945,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -969,11 +977,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul st0, st0": { @@ -984,8 +992,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1016,11 +1024,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmul st0, st1": { @@ -1031,8 +1039,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1063,11 +1071,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul st0, st2": { @@ -1078,8 +1086,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1110,11 +1118,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul st0, st3": { @@ -1125,8 +1133,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1157,11 +1165,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul st0, st4": { @@ -1172,8 +1180,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1204,11 +1212,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul st0, st5": { @@ -1219,8 +1227,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1251,11 +1259,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul st0, st6": { @@ -1266,8 +1274,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1298,11 +1306,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul st0, st7": { @@ -1313,8 +1321,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1345,11 +1353,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fcom st0, st0": { @@ -1361,8 +1369,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x0", "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1396,13 +1404,13 @@ "mov x20, x0", "ubfx x22, x20, #1, #1", "ubfx x23, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w22, w22, w20", - "orr w23, w23, w20", - "strb w22, [x28, #744]", + "ubfx x24, x20, #2, #1", + "orr w20, w22, w24", + "orr w22, w23, w24", + "strb w20, [x28, #744]", "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w23, [x28, #750]" + "strb w24, [x28, #746]", + "strb w22, [x28, #750]" ] }, "fcom st0, st1": { @@ -1413,8 +1421,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1448,14 +1456,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcom st0, st2": { @@ -1466,8 +1474,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1501,14 +1509,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcom st0, st3": { @@ -1519,8 +1527,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1554,14 +1562,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcom st0, st4": { @@ -1572,8 +1580,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1607,14 +1615,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcom st0, st5": { @@ -1625,8 +1633,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1660,14 +1668,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcom st0, st6": { @@ -1678,8 +1686,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1713,14 +1721,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcom st0, st7": { @@ -1731,8 +1739,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1766,14 +1774,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcomp st0, st0": { @@ -1785,8 +1793,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x0", "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1820,20 +1828,20 @@ "mov x22, x0", "ubfx x23, x22, #1, #1", "ubfx x24, x22, #0, #1", - "ubfx x22, x22, #2, #1", - "orr w23, w23, w22", - "orr w24, w24, w22", - "strb w23, [x28, #744]", + "ubfx x25, x22, #2, #1", + "orr w22, w23, w25", + "orr w23, w24, w25", + "strb w22, [x28, #744]", "strb w21, [x28, #745]", - "strb w22, [x28, #746]", - "strb w24, [x28, #750]", + "strb w25, [x28, #746]", + "strb w23, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -1846,8 +1854,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1881,20 +1889,20 @@ "mov x22, x0", "ubfx x23, x22, #1, #1", "ubfx x24, x22, #0, #1", - "ubfx x22, x22, #2, #1", - "orr w23, w23, w22", - "orr w24, w24, w22", - "strb w23, [x28, #744]", - "mov w23, #0x0", - "strb w23, [x28, #745]", - "strb w22, [x28, #746]", - "strb w24, [x28, #750]", + "ubfx x25, x22, #2, #1", + "orr w22, w23, w25", + "orr w23, w24, w25", + "strb w22, [x28, #744]", + "mov w22, #0x0", + "strb w22, [x28, #745]", + "strb w25, [x28, #746]", + "strb w23, [x28, #750]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -1906,8 +1914,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -1941,21 +1949,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -1967,8 +1975,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2002,21 +2010,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -2028,8 +2036,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2063,21 +2071,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -2089,8 +2097,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2124,21 +2132,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -2150,8 +2158,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2185,21 +2193,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -2211,8 +2219,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2246,21 +2254,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -2272,8 +2280,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2304,11 +2312,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsub st0, st1": { @@ -2319,8 +2327,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2351,11 +2359,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsub st0, st2": { @@ -2366,8 +2374,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2398,11 +2406,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsub st0, st3": { @@ -2413,8 +2421,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2445,11 +2453,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsub st0, st4": { @@ -2460,8 +2468,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2492,11 +2500,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsub st0, st5": { @@ -2507,8 +2515,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2539,11 +2547,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsub st0, st6": { @@ -2554,8 +2562,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2586,11 +2594,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsub st0, st7": { @@ -2601,8 +2609,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2633,11 +2641,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr st0, st0": { @@ -2648,8 +2656,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2680,11 +2688,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubr st0, st1": { @@ -2695,8 +2703,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2727,11 +2735,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr st0, st2": { @@ -2742,8 +2750,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2774,11 +2782,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr st0, st3": { @@ -2789,8 +2797,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2821,11 +2829,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr st0, st4": { @@ -2836,8 +2844,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2868,11 +2876,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr st0, st5": { @@ -2883,8 +2891,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2915,11 +2923,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr st0, st6": { @@ -2930,8 +2938,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -2962,11 +2970,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr st0, st7": { @@ -2977,8 +2985,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3009,11 +3017,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv st0, st0": { @@ -3024,8 +3032,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3056,11 +3064,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdiv st0, st1": { @@ -3071,8 +3079,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3103,11 +3111,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv st0, st2": { @@ -3118,8 +3126,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3150,11 +3158,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv st0, st3": { @@ -3165,8 +3173,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3197,11 +3205,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv st0, st4": { @@ -3212,8 +3220,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3244,11 +3252,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv st0, st5": { @@ -3259,8 +3267,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3291,11 +3299,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv st0, st6": { @@ -3306,8 +3314,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3338,11 +3346,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv st0, st7": { @@ -3353,8 +3361,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3385,11 +3393,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr st0, st0": { @@ -3400,8 +3408,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3432,11 +3440,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivr st0, st1": { @@ -3447,8 +3455,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3479,11 +3487,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr st0, st2": { @@ -3494,8 +3502,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3526,11 +3534,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr st0, st3": { @@ -3541,8 +3549,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3573,11 +3581,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr st0, st4": { @@ -3588,8 +3596,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3620,11 +3628,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr st0, st5": { @@ -3635,8 +3643,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3667,11 +3675,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr st0, st6": { @@ -3682,8 +3690,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3714,11 +3722,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr st0, st7": { @@ -3729,8 +3737,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -3761,21 +3769,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fld dword [rax]": { - "ExpectedInstructionCount": 39, + "ExpectedInstructionCount": 40, "Comment": [ "0xd9 !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", + "mov x21, x4", + "ldr s2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -3800,23 +3809,23 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fst dword [rax]": { - "ExpectedInstructionCount": 30, + "ExpectedInstructionCount": 31, "Comment": [ "0xd9 !11b /2" ], @@ -3849,12 +3858,13 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "fmov s2, s0", - "str s2, [x4]" + "fmov s3, s0", + "mov x20, x4", + "str s3, [x20]" ] }, "fstp dword [rax]": { - "ExpectedInstructionCount": 38, + "ExpectedInstructionCount": 39, "Comment": [ "0xd9 !11b /3" ], @@ -3887,82 +3897,85 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "fmov s2, s0", - "str s2, [x4]", + "fmov s3, s0", + "mov x21, x4", + "str s3, [x21]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fldenv [rax]": { - "ExpectedInstructionCount": 48, + "ExpectedInstructionCount": 49, "Comment": [ "0xd9 !11b /4" ], "ExpectedArm64ASM": [ - "ldrh w20, [x4]", - "strh w20, [x28, #1024]", - "ldr w20, [x4, #4]", - "ubfx w21, w20, #11, #3", - "strb w21, [x28, #747]", - "ubfx w21, w20, #8, #1", - "ubfx w22, w20, #9, #1", - "ubfx w23, w20, #10, #1", - "ubfx w20, w20, #14, #1", - "strb w21, [x28, #744]", - "strb w22, [x28, #745]", - "strb w23, [x28, #746]", - "strb w20, [x28, #750]", - "ldr w20, [x4, #8]", - "ubfx w21, w20, #0, #2", + "mov x20, x4", + "ldrh w21, [x20]", + "strh w21, [x28, #1024]", + "ldr w21, [x20, #4]", + "ubfx w22, w21, #11, #3", + "strb w22, [x28, #747]", + "ubfx w22, w21, #8, #1", + "ubfx w23, w21, #9, #1", + "ubfx w24, w21, #10, #1", + "ubfx w25, w21, #14, #1", + "strb w22, [x28, #744]", + "strb w23, [x28, #745]", + "strb w24, [x28, #746]", + "strb w25, [x28, #750]", + "ldr w21, [x20, #8]", + "ubfx w20, w21, #0, #2", "mrs x22, nzcv", - "cmp x21, #0x3 (3)", - "cset x21, ne", - "ubfx w23, w20, #2, #2", - "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #1", - "ubfx w23, w20, #4, #2", - "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #2", - "ubfx w23, w20, #6, #2", - "cmp x23, #0x3 (3)", + "cmp x20, #0x3 (3)", "cset x23, ne", - "orr w21, w21, w23, lsl #3", - "ubfx w23, w20, #8, #2", + "ubfx w20, w21, #2, #2", + "cmp x20, #0x3 (3)", + "cset x24, ne", + "orr w20, w23, w24, lsl #1", + "ubfx w23, w21, #4, #2", "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #4", - "ubfx w23, w20, #10, #2", + "cset x24, ne", + "orr w23, w20, w24, lsl #2", + "ubfx w20, w21, #6, #2", + "cmp x20, #0x3 (3)", + "cset x24, ne", + "orr w20, w23, w24, lsl #3", + "ubfx w23, w21, #8, #2", "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #5", - "ubfx w23, w20, #12, #2", + "cset x24, ne", + "orr w23, w20, w24, lsl #4", + "ubfx w20, w21, #10, #2", + "cmp x20, #0x3 (3)", + "cset x24, ne", + "orr w20, w23, w24, lsl #5", + "ubfx w23, w21, #12, #2", "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #6", - "ubfx w20, w20, #14, #2", + "cset x24, ne", + "orr w23, w20, w24, lsl #6", + "ubfx w20, w21, #14, #2", "cmp x20, #0x3 (3)", - "cset x20, ne", - "orr w20, w21, w20, lsl #7", + "cset x21, ne", + "orr w20, w23, w21, lsl #7", "strb w20, [x28, #1026]", "msr nzcv, x22" ] }, "fldcw [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": [ "0xd9 !11b /5" ], "ExpectedArm64ASM": [ - "ldrh w20, [x4]", - "strh w20, [x28, #1024]" + "mov x20, x4", + "ldrh w21, [x20]", + "strh w21, [x28, #1024]" ] }, "fnstenv [rax]": { @@ -3971,80 +3984,81 @@ "0xd9 !11b /6" ], "ExpectedArm64ASM": [ - "ldrh w20, [x28, #1024]", - "str w20, [x4]", - "mov w20, #0x0", - "ldrb w21, [x28, #747]", - "mov x0, x20", - "bfi x0, x21, #11, #3", - "mov x21, x0", + "mov x20, x4", + "ldrh w21, [x28, #1024]", + "str w21, [x20]", + "mov w21, #0x0", + "ldrb w22, [x28, #747]", + "mov x23, x21", + "bfi x23, x22, #11, #3", "ldrb w22, [x28, #744]", - "ldrb w23, [x28, #745]", - "ldrb w24, [x28, #746]", - "ldrb w25, [x28, #750]", - "orr x21, x21, x22, lsl #8", - "orr x21, x21, x23, lsl #9", - "orr x21, x21, x24, lsl #10", - "orr x21, x21, x25, lsl #14", - "str w21, [x4, #4]", - "ldrb w21, [x28, #1026]", - "and w22, w21, #0x1", - "mov w23, #0x3", - "mrs x24, nzcv", + "ldrb w24, [x28, #745]", + "ldrb w25, [x28, #746]", + "ldrb w30, [x28, #750]", + "orr x18, x23, x22, lsl #8", + "orr x22, x18, x24, lsl #9", + "orr x23, x22, x25, lsl #10", + "orr x22, x23, x30, lsl #14", + "str w22, [x20, #4]", + "ldrb w22, [x28, #1026]", + "and w23, w22, #0x1", + "mov w24, #0x3", + "mrs x25, nzcv", + "cmp x23, #0x0 (0)", + "csel x30, x24, x21, eq", + "orr w23, w21, w30", + "lsr w30, w22, #1", + "and w18, w30, #0x1", + "cmp x18, #0x0 (0)", + "csel x30, x24, x21, eq", + "orr w18, w23, w30, lsl #2", + "lsr w23, w22, #2", + "and w30, w23, #0x1", + "cmp x30, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w30, w18, w23, lsl #4", + "lsr w23, w22, #3", + "and w18, w23, #0x1", + "cmp x18, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w18, w30, w23, lsl #6", + "lsr w23, w22, #4", + "and w30, w23, #0x1", + "cmp x30, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w30, w18, w23, lsl #8", + "lsr w23, w22, #5", + "and w18, w23, #0x1", + "cmp x18, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w18, w30, w23, lsl #10", + "lsr w23, w22, #6", + "and w30, w23, #0x1", + "cmp x30, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w30, w18, w23, lsl #12", + "lsr w23, w22, #7", + "and w22, w23, #0x1", "cmp x22, #0x0 (0)", - "csel x22, x23, x20, eq", - "orr w22, w20, w22", - "lsr w25, w21, #1", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #2", - "lsr w25, w21, #2", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #4", - "lsr w25, w21, #3", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #6", - "lsr w25, w21, #4", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #8", - "lsr w25, w21, #5", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #10", - "lsr w25, w21, #6", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #12", - "lsr w21, w21, #7", - "and w21, w21, #0x1", - "cmp x21, #0x0 (0)", - "csel x21, x23, x20, eq", - "orr w21, w22, w21, lsl #14", - "str w21, [x4, #8]", - "str w20, [x4, #12]", - "str w20, [x4, #16]", - "str w20, [x4, #20]", - "str w20, [x4, #24]", - "msr nzcv, x24" + "csel x23, x24, x21, eq", + "orr w22, w30, w23, lsl #14", + "str w22, [x20, #8]", + "str w21, [x20, #12]", + "str w21, [x20, #16]", + "str w21, [x20, #20]", + "str w21, [x20, #24]", + "msr nzcv, x25" ] }, "fnstcw [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": [ "0xd9 !11b /7" ], "ExpectedArm64ASM": [ "ldrh w20, [x28, #1024]", - "strh w20, [x4]" + "mov x21, x4", + "strh w20, [x21]" ] }, "fld st0": { @@ -4055,15 +4069,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -4079,14 +4093,14 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -4101,15 +4115,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -4124,15 +4138,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -4147,15 +4161,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -4170,15 +4184,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -4193,15 +4207,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -4216,15 +4230,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -4239,14 +4253,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -4258,14 +4272,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -4277,14 +4291,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -4296,14 +4310,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -4315,14 +4329,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -4334,14 +4348,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -4353,14 +4367,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -4372,14 +4386,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -4391,7 +4405,7 @@ "ExpectedArm64ASM": [] }, "fchs": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 11, "Comment": [ "0xd9 11b 0xe0 /4" ], @@ -4402,14 +4416,15 @@ "mov w21, #0x0", "mov w22, #0x8000", "fmov d3, x21", - "mov v3.d[1], x22", - "eor v2.16b, v2.16b, v3.16b", + "mov v4.16b, v3.16b", + "mov v4.d[1], x22", + "eor v3.16b, v2.16b, v4.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fabs": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 11, "Comment": [ "0xd9 11b 0xe1 /4" ], @@ -4420,10 +4435,11 @@ "mov x21, #0xffffffffffffffff", "mov w22, #0x7fff", "fmov d3, x21", - "mov v3.d[1], x22", - "and v2.16b, v2.16b, v3.16b", + "mov v4.16b, v3.16b", + "mov v4.d[1], x22", + "and v3.16b, v2.16b, v4.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "ftst": { @@ -4467,13 +4483,13 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", "strb w20, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]" + "strb w24, [x28, #746]", + "strb w22, [x28, #750]" ] }, "fxam": { @@ -4486,11 +4502,11 @@ "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", "mov x21, v2.d[1]", - "ubfx x21, x21, #15, #1", - "strb w21, [x28, #745]", + "ubfx x22, x21, #15, #1", + "strb w22, [x28, #745]", "ldrb w21, [x28, #1026]", - "lsr w20, w21, w20", - "and w20, w20, #0x1", + "lsr w22, w21, w20", + "and w20, w22, #0x1", "mrs x21, nzcv", "cmp x20, #0x1 (1)", "cset x22, ne", @@ -4508,11 +4524,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "ldr q2, [x28, #2304]", @@ -4528,11 +4544,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "ldr q2, [x28, #2320]", @@ -4548,11 +4564,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "ldr q2, [x28, #2336]", @@ -4568,11 +4584,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "ldr q2, [x28, #2352]", @@ -4588,11 +4604,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "ldr q2, [x28, #2368]", @@ -4608,11 +4624,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "ldr q2, [x28, #2384]", @@ -4628,11 +4644,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "movi v2.2d, #0x0", @@ -4674,11 +4690,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fyl2x": { @@ -4690,15 +4706,15 @@ "ldrb w20, [x28, #747]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "strb w21, [x28, #747]", + "and w22, w21, #0x7", + "strb w22, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -4727,11 +4743,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fptan": { @@ -4743,12 +4759,12 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "sub w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w22", - "orr w21, w23, w21", + "and w23, w22, #0x7", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w23", + "orr w21, w22, w24", "strb w21, [x28, #1026]", - "strb w22, [x28, #747]", + "strb w23, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", "mrs x0, nzcv", @@ -4776,16 +4792,16 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldr q3, [x28, #2304]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "ldr q2, [x28, #2304]", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "add x0, x28, x22, lsl #4", - "str q3, [x0, #768]" + "str q3, [x0, #768]", + "add x0, x28, x23, lsl #4", + "str q2, [x0, #768]" ] }, "fpatan": { @@ -4797,15 +4813,15 @@ "ldrb w20, [x28, #747]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "strb w21, [x28, #747]", + "and w22, w21, #0x7", + "strb w22, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -4834,11 +4850,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fxtract": { @@ -4850,12 +4866,12 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "sub w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w22", - "orr w21, w23, w21", + "and w23, w22, #0x7", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w23", + "orr w21, w22, w24", "strb w21, [x28, #1026]", - "strb w22, [x28, #747]", + "strb w23, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", "mrs x0, nzcv", @@ -4911,13 +4927,13 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x22, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str q4, [x0, #768]" ] }, "fprem1": { @@ -4928,10 +4944,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -4960,13 +4976,13 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdecstp": { @@ -4976,8 +4992,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -4988,8 +5004,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -5001,10 +5017,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -5033,13 +5049,13 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fyl2xp1": { @@ -5051,15 +5067,15 @@ "ldrb w20, [x28, #747]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "strb w21, [x28, #747]", + "and w22, w21, #0x7", + "strb w22, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "ldr q4, [x28, #2304]", "mrs x0, nzcv", @@ -5089,9 +5105,9 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v5.16b, v5.16b, v5.16b", + "mov v5.d[0], x0", + "mov v5.h[4], w1", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5104,8 +5120,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", + "mov x1, v5.d[0]", + "umov w2, v5.h[4]", "mov x3, v3.d[0]", "umov w4, v3.h[4]", "ldr x5, [x28, #1440]", @@ -5122,7 +5138,7 @@ "eor v2.16b, v2.16b, v2.16b", "mov v2.d[0], x0", "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -5160,11 +5176,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fsincos": { @@ -5176,12 +5192,12 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "sub w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w22", - "orr w21, w23, w21", + "and w23, w22, #0x7", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w23", + "orr w21, w22, w24", "strb w21, [x28, #1026]", - "strb w22, [x28, #747]", + "strb w23, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", "mrs x0, nzcv", @@ -5237,15 +5253,15 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x22, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str q4, [x0, #768]" ] }, "frndint": { @@ -5282,11 +5298,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fscale": { @@ -5297,10 +5313,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -5329,11 +5345,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsin": { @@ -5370,13 +5386,13 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fcos": { @@ -5413,23 +5429,24 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fiadd dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xda !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", + "mov x21, x4", + "ldr w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5441,7 +5458,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "mov w1, w21", + "mov w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1320]", "blr x2", @@ -5486,21 +5503,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fimul dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xda !11b /1" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", + "mov x21, x4", + "ldr w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5512,7 +5530,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "mov w1, w21", + "mov w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1320]", "blr x2", @@ -5557,21 +5575,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "ficom dword [rax]": { - "ExpectedInstructionCount": 69, + "ExpectedInstructionCount": 70, "Comment": [ "0xda !11b /2" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", + "mov x21, x4", + "ldr w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5583,7 +5602,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "mov w1, w21", + "mov w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1320]", "blr x2", @@ -5631,24 +5650,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "ficomp dword [rax]": { - "ExpectedInstructionCount": 77, + "ExpectedInstructionCount": 78, "Comment": [ "0xda !11b /3" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", + "mov x21, x4", + "ldr w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5660,7 +5680,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "mov w1, w21", + "mov w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1320]", "blr x2", @@ -5708,32 +5728,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fisub dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xda !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", + "mov x21, x4", + "ldr w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5745,7 +5766,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "mov w1, w21", + "mov w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1320]", "blr x2", @@ -5790,21 +5811,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fisubr dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xda !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", + "mov x21, x4", + "ldr w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5816,7 +5838,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "mov w1, w21", + "mov w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1320]", "blr x2", @@ -5861,21 +5883,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fidiv dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xda !11b /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", + "mov x21, x4", + "ldr w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5887,7 +5910,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "mov w1, w21", + "mov w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1320]", "blr x2", @@ -5932,21 +5955,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fidivr dword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xda !11b /7" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", + "mov x21, x4", + "ldr w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -5958,7 +5982,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "mov w1, w21", + "mov w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1320]", "blr x2", @@ -6003,15 +6027,15 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fcmovb st0, st0": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc0 /0" ], @@ -6020,18 +6044,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc1 /0" ], @@ -6040,18 +6065,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st2": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc2 /0" ], @@ -6060,18 +6086,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st3": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc3 /0" ], @@ -6080,18 +6107,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st4": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc4 /0" ], @@ -6100,18 +6128,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st5": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc5 /0" ], @@ -6120,18 +6149,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st6": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc6 /0" ], @@ -6140,18 +6170,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st7": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc7 /0" ], @@ -6160,18 +6191,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st0": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc8 /1" ], @@ -6180,18 +6212,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc9 /1" ], @@ -6200,18 +6233,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st2": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xca /1" ], @@ -6220,18 +6254,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st3": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xcb /1" ], @@ -6240,18 +6275,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st4": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xcc /1" ], @@ -6260,18 +6296,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st5": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xcd /1" ], @@ -6280,18 +6317,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st6": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xce /1" ], @@ -6300,18 +6338,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st7": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xcf /1" ], @@ -6320,398 +6359,423 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st0": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd0 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st1": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd1 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st2": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd2 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st3": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd3 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st4": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd4 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st5": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd5 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st6": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd6 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st7": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd7 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovu st0, st0": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xd8 /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x0 (0)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st1": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xd9 /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x1 (1)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st2": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xda /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x2 (2)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x2 (2)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st3": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xdb /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x3 (3)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x3 (3)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st4": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xdc /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x4 (4)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x4 (4)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st5": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xdd /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x5 (5)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x5 (5)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st6": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xde /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x6 (6)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x6 (6)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st7": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xdf /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x7 (7)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x7 (7)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fucompp": { @@ -6723,8 +6787,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -6758,73 +6822,78 @@ "mov x22, x0", "ubfx x23, x22, #1, #1", "ubfx x24, x22, #0, #1", - "ubfx x22, x22, #2, #1", - "orr w23, w23, w22", - "orr w24, w24, w22", - "strb w23, [x28, #744]", - "mov w23, #0x0", - "strb w23, [x28, #745]", - "strb w22, [x28, #746]", - "strb w24, [x28, #750]", + "ubfx x25, x22, #2, #1", + "orr w22, w23, w25", + "orr w23, w24, w25", + "strb w22, [x28, #744]", + "mov w22, #0x0", + "strb w22, [x28, #745]", + "strb w25, [x28, #746]", + "strb w23, [x28, #750]", "ldrb w22, [x28, #1026]", "lsl w23, w21, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "bic w24, w22, w23", + "strb w24, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "lsl w22, w21, w20", + "bic w21, w24, w22", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fild dword [rax]": { - "ExpectedInstructionCount": 35, + "ExpectedInstructionCount": 40, "Comment": [ "0xdf !11b /5" ], "ExpectedArm64ASM": [ + "sub sp, sp, #0x20 (32)", "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "ldr w21, [x4]", - "mov w22, #0x0", - "sxtw x21, w21", + "mov x21, x4", + "ldr w22, [x21]", + "mov w21, #0x0", + "sxtw x23, w22", "mrs x22, nzcv", - "cmp x21, #0x0 (0)", - "mov w23, #0x8000", - "csel x23, x23, xzr, lt", - "cneg x21, x21, mi", - "mov w24, #0x3f", + "cmp x23, #0x0 (0)", + "mov w24, #0x8000", + "csel x25, x24, xzr, lt", + "cneg x24, x23, mi", + "mov w23, #0x3f", "mov x0, #0x3f", - "clz x25, x21", - "sub x25, x0, x25", - "sub x24, x24, x25", - "lsl x25, x21, x24", + "clz x30, x24", + "sub x30, x0, x30", + "sub x18, x23, x30", + "lsl x23, x24, x18", "mov w30, #0x403e", - "sub x24, x30, x24", - "mov w30, #0x0", - "cmp x21, #0x0 (0)", - "csel x21, x30, x24, eq", - "orr x21, x23, x21", - "fmov d2, x25", + "str w22, [sp]", + "sub x22, x30, x18", + "cmp x24, #0x0 (0)", + "csel x30, x21, x22, eq", + "orr x21, x25, x30", + "fmov d2, x23", "fmov d3, x21", - "mov v2.d[1], v3.d[0]", + "mov v4.16b, v2.16b", + "mov v4.d[1], v3.d[0]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x22" + "str q4, [x0, #768]", + "ldr w20, [sp]", + "msr nzcv, x20", + "add sp, sp, #0x20 (32)" ] }, "fisttp dword [rax]": { - "ExpectedInstructionCount": 38, + "ExpectedInstructionCount": 39, "Comment": [ "0xdb !11b /1" ], @@ -6858,19 +6927,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "mov w21, w0", - "str w21, [x4]", + "mov x22, x4", + "str w21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fist dword [rax]": { - "ExpectedInstructionCount": 30, + "ExpectedInstructionCount": 31, "Comment": [ "0xdb !11b /2" ], @@ -6904,11 +6974,12 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "mov w20, w0", - "str w20, [x4]" + "mov x21, x4", + "str w20, [x21]" ] }, "fistp dword [rax]": { - "ExpectedInstructionCount": 38, + "ExpectedInstructionCount": 39, "Comment": [ "0xdf !11b /7" ], @@ -6942,31 +7013,33 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "mov w21, w0", - "str w21, [x4]", + "mov x22, x4", + "str w21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fld tword [rax]": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr q2, [x4]", + "mov x21, x4", + "ldr q2, [x21]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -6974,7 +7047,7 @@ ] }, "fstp tword [rax]": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xdb !11b /7" ], @@ -6982,21 +7055,22 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "str d2, [x4]", - "mov x21, v2.d[1]", - "strh w21, [x4, #8]", + "mov x21, x4", + "str d2, [x21]", + "mov x22, v2.d[1]", + "strh w22, [x21, #8]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcmovnb st0, st0": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc0 /0" ], @@ -7005,18 +7079,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc1 /0" ], @@ -7025,18 +7100,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st2": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc2 /0" ], @@ -7045,18 +7121,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st3": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc3 /0" ], @@ -7065,18 +7142,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st4": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc4 /0" ], @@ -7085,18 +7163,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st5": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc5 /0" ], @@ -7105,18 +7184,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st6": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc6 /0" ], @@ -7125,18 +7205,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st7": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc7 /0" ], @@ -7145,18 +7226,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st0": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc8 /1" ], @@ -7165,18 +7247,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc9 /1" ], @@ -7185,18 +7268,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st2": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xca /1" ], @@ -7205,18 +7289,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st3": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xcb /1" ], @@ -7225,18 +7310,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st4": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xcc /1" ], @@ -7245,18 +7331,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st5": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xcd /1" ], @@ -7265,18 +7352,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st6": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xce /1" ], @@ -7285,18 +7373,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st7": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xcf /1" ], @@ -7305,390 +7394,415 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st0": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd0 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st1": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd1 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st2": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd2 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st3": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd3 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st4": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd4 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st5": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd5 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st6": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd6 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st7": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd7 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnu st0, st0": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xd8 /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x0 (0)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st1": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xd9 /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x1 (1)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st2": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xda /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x2 (2)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x2 (2)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st3": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xdb /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x3 (3)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x3 (3)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st4": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xdc /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x4 (4)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x4 (4)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st5": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xdd /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x5 (5)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x5 (5)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st6": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xde /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x6 (6)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x6 (6)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st7": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xdf /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x7 (7)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x7 (7)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fnclex": { @@ -7716,15 +7830,15 @@ ] }, "fucomi st0, st0": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 45, "Comment": [ "0xdb 11b 0xe8 /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -7758,25 +7872,26 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "lsl x21, x21, #29", - "orr w21, w21, w22, lsl #30", - "eor w26, w20, #0x1", - "msr nzcv, x21" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "lsl x22, x20, #29", + "orr w20, w22, w21, lsl #30", + "eor w21, w23, #0x1", + "mov x26, x21", + "msr nzcv, x20" ] }, "fucomi st0, st1": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 45, "Comment": [ "0xdb 11b 0xe9 /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -7810,25 +7925,26 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "lsl x21, x21, #29", - "orr w21, w21, w22, lsl #30", - "eor w26, w20, #0x1", - "msr nzcv, x21" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "lsl x22, x20, #29", + "orr w20, w22, w21, lsl #30", + "eor w21, w23, #0x1", + "mov x26, x21", + "msr nzcv, x20" ] }, "fucomi st0, st2": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 45, "Comment": [ "0xdb 11b 0xea /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -7862,25 +7978,26 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "lsl x21, x21, #29", - "orr w21, w21, w22, lsl #30", - "eor w26, w20, #0x1", - "msr nzcv, x21" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "lsl x22, x20, #29", + "orr w20, w22, w21, lsl #30", + "eor w21, w23, #0x1", + "mov x26, x21", + "msr nzcv, x20" ] }, "fucomi st0, st3": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 45, "Comment": [ "0xdb 11b 0xeb /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -7914,25 +8031,26 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "lsl x21, x21, #29", - "orr w21, w21, w22, lsl #30", - "eor w26, w20, #0x1", - "msr nzcv, x21" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "lsl x22, x20, #29", + "orr w20, w22, w21, lsl #30", + "eor w21, w23, #0x1", + "mov x26, x21", + "msr nzcv, x20" ] }, "fucomi st0, st4": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 45, "Comment": [ "0xdb 11b 0xec /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -7966,25 +8084,26 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "lsl x21, x21, #29", - "orr w21, w21, w22, lsl #30", - "eor w26, w20, #0x1", - "msr nzcv, x21" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "lsl x22, x20, #29", + "orr w20, w22, w21, lsl #30", + "eor w21, w23, #0x1", + "mov x26, x21", + "msr nzcv, x20" ] }, "fucomi st0, st5": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 45, "Comment": [ "0xdb 11b 0xed /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8018,25 +8137,26 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "lsl x21, x21, #29", - "orr w21, w21, w22, lsl #30", - "eor w26, w20, #0x1", - "msr nzcv, x21" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "lsl x22, x20, #29", + "orr w20, w22, w21, lsl #30", + "eor w21, w23, #0x1", + "mov x26, x21", + "msr nzcv, x20" ] }, "fucomi st0, st6": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 45, "Comment": [ "0xdb 11b 0xee /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8070,25 +8190,26 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "lsl x21, x21, #29", - "orr w21, w21, w22, lsl #30", - "eor w26, w20, #0x1", - "msr nzcv, x21" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "lsl x22, x20, #29", + "orr w20, w22, w21, lsl #30", + "eor w21, w23, #0x1", + "mov x26, x21", + "msr nzcv, x20" ] }, "fucomi st0, st7": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 45, "Comment": [ "0xdb 11b 0xef /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8122,25 +8243,26 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "lsl x21, x21, #29", - "orr w21, w21, w22, lsl #30", - "eor w26, w20, #0x1", - "msr nzcv, x21" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "lsl x22, x20, #29", + "orr w20, w22, w21, lsl #30", + "eor w21, w23, #0x1", + "mov x26, x21", + "msr nzcv, x20" ] }, "fcomi st0, st0": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 45, "Comment": [ "0xdb 11b 0xf0 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8174,25 +8296,26 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "lsl x21, x21, #29", - "orr w21, w21, w22, lsl #30", - "eor w26, w20, #0x1", - "msr nzcv, x21" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "lsl x22, x20, #29", + "orr w20, w22, w21, lsl #30", + "eor w21, w23, #0x1", + "mov x26, x21", + "msr nzcv, x20" ] }, "fcomi st0, st1": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 45, "Comment": [ "0xdb 11b 0xf1 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8226,25 +8349,26 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "lsl x21, x21, #29", - "orr w21, w21, w22, lsl #30", - "eor w26, w20, #0x1", - "msr nzcv, x21" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "lsl x22, x20, #29", + "orr w20, w22, w21, lsl #30", + "eor w21, w23, #0x1", + "mov x26, x21", + "msr nzcv, x20" ] }, "fcomi st0, st2": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 45, "Comment": [ "0xdb 11b 0xf2 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8278,25 +8402,26 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "lsl x21, x21, #29", - "orr w21, w21, w22, lsl #30", - "eor w26, w20, #0x1", - "msr nzcv, x21" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "lsl x22, x20, #29", + "orr w20, w22, w21, lsl #30", + "eor w21, w23, #0x1", + "mov x26, x21", + "msr nzcv, x20" ] }, "fcomi st0, st3": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 45, "Comment": [ "0xdb 11b 0xf3 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8330,25 +8455,26 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "lsl x21, x21, #29", - "orr w21, w21, w22, lsl #30", - "eor w26, w20, #0x1", - "msr nzcv, x21" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "lsl x22, x20, #29", + "orr w20, w22, w21, lsl #30", + "eor w21, w23, #0x1", + "mov x26, x21", + "msr nzcv, x20" ] }, "fcomi st0, st4": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 45, "Comment": [ "0xdb 11b 0xf4 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8382,25 +8508,26 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "lsl x21, x21, #29", - "orr w21, w21, w22, lsl #30", - "eor w26, w20, #0x1", - "msr nzcv, x21" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "lsl x22, x20, #29", + "orr w20, w22, w21, lsl #30", + "eor w21, w23, #0x1", + "mov x26, x21", + "msr nzcv, x20" ] }, "fcomi st0, st5": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 45, "Comment": [ "0xdb 11b 0xf5 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8434,25 +8561,26 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "lsl x21, x21, #29", - "orr w21, w21, w22, lsl #30", - "eor w26, w20, #0x1", - "msr nzcv, x21" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "lsl x22, x20, #29", + "orr w20, w22, w21, lsl #30", + "eor w21, w23, #0x1", + "mov x26, x21", + "msr nzcv, x20" ] }, "fcomi st0, st6": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 45, "Comment": [ "0xdb 11b 0xf6 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8486,25 +8614,26 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "lsl x21, x21, #29", - "orr w21, w21, w22, lsl #30", - "eor w26, w20, #0x1", - "msr nzcv, x21" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "lsl x22, x20, #29", + "orr w20, w22, w21, lsl #30", + "eor w21, w23, #0x1", + "mov x26, x21", + "msr nzcv, x20" ] }, "fcomi st0, st7": { - "ExpectedInstructionCount": 44, + "ExpectedInstructionCount": 45, "Comment": [ "0xdb 11b 0xf7 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -8538,23 +8667,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "lsl x21, x21, #29", - "orr w21, w21, w22, lsl #30", - "eor w26, w20, #0x1", - "msr nzcv, x21" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "lsl x22, x20, #29", + "orr w20, w22, w21, lsl #30", + "eor w21, w23, #0x1", + "mov x26, x21", + "msr nzcv, x20" ] }, "fadd qword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xdc !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8579,11 +8710,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8596,10 +8727,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1408]", "blr x5", "ldr w4, [x28, #728]", @@ -8611,21 +8742,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fmul qword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xdc !11b /1" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8650,11 +8782,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8667,10 +8799,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1424]", "blr x5", "ldr w4, [x28, #728]", @@ -8682,21 +8814,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fcom qword [rax]": { - "ExpectedInstructionCount": 69, + "ExpectedInstructionCount": 70, "Comment": [ "0xdc !11b /2" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8721,11 +8854,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8738,10 +8871,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1304]", "blr x5", "ldr w4, [x28, #728]", @@ -8756,24 +8889,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fcomp qword [rax]": { - "ExpectedInstructionCount": 77, + "ExpectedInstructionCount": 78, "Comment": [ "0xdc !11b /3" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8798,11 +8932,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8815,10 +8949,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1304]", "blr x5", "ldr w4, [x28, #728]", @@ -8833,32 +8967,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fsub qword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xdc !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8883,11 +9018,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8900,10 +9035,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1416]", "blr x5", "ldr w4, [x28, #728]", @@ -8915,21 +9050,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fsubr qword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xdc !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8954,11 +9090,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -8971,10 +9107,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", - "mov x3, v3.d[0]", - "umov w4, v3.h[4]", + "mov x1, v3.d[0]", + "umov w2, v3.h[4]", + "mov x3, v2.d[0]", + "umov w4, v2.h[4]", "ldr x5, [x28, #1416]", "blr x5", "ldr w4, [x28, #728]", @@ -8986,21 +9122,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdiv qword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xdc !11b /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -9025,11 +9162,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -9042,10 +9179,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", - "mov x3, v2.d[0]", - "umov w4, v2.h[4]", + "mov x1, v2.d[0]", + "umov w2, v2.h[4]", + "mov x3, v3.d[0]", + "umov w4, v3.h[4]", "ldr x5, [x28, #1432]", "blr x5", "ldr w4, [x28, #728]", @@ -9057,21 +9194,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fdivr qword [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xdc !11b /7" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -9096,11 +9234,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "ldr q3, [x0, #768]", + "ldr q2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -9113,10 +9251,10 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", - "mov x3, v3.d[0]", - "umov w4, v3.h[4]", + "mov x1, v3.d[0]", + "umov w2, v3.h[4]", + "mov x3, v2.d[0]", + "umov w4, v2.h[4]", "ldr x5, [x28, #1432]", "blr x5", "ldr w4, [x28, #728]", @@ -9128,11 +9266,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "db 0xdc, 0xc0": { @@ -9145,8 +9283,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9177,11 +9315,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fadd st1, st0": { @@ -9192,8 +9330,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9224,11 +9362,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fadd st2, st0": { @@ -9239,8 +9377,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9271,11 +9409,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fadd st3, st0": { @@ -9286,8 +9424,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9318,11 +9456,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fadd st4, st0": { @@ -9333,8 +9471,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9365,11 +9503,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fadd st5, st0": { @@ -9380,8 +9518,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9412,11 +9550,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fadd st6, st0": { @@ -9427,8 +9565,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9459,11 +9597,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fadd st7, st0": { @@ -9474,8 +9612,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9506,11 +9644,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "db 0xdc, 0xc8": { @@ -9523,8 +9661,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9555,11 +9693,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmul st1, st0": { @@ -9570,8 +9708,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9602,11 +9740,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmul st2, st0": { @@ -9617,8 +9755,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9649,11 +9787,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmul st3, st0": { @@ -9664,8 +9802,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9696,11 +9834,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmul st4, st0": { @@ -9711,8 +9849,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9743,11 +9881,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmul st5, st0": { @@ -9758,8 +9896,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9790,11 +9928,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmul st6, st0": { @@ -9805,8 +9943,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9837,11 +9975,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmul st7, st0": { @@ -9852,8 +9990,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9884,11 +10022,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "db 0xdc, 0xe0": { @@ -9901,8 +10039,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9933,11 +10071,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubr st1, st0": { @@ -9948,8 +10086,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -9980,11 +10118,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubr st2, st0": { @@ -9995,8 +10133,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10027,11 +10165,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubr st3, st0": { @@ -10042,8 +10180,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10074,11 +10212,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubr st4, st0": { @@ -10089,8 +10227,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10121,11 +10259,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubr st5, st0": { @@ -10136,8 +10274,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10168,11 +10306,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubr st6, st0": { @@ -10183,8 +10321,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10215,11 +10353,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubr st7, st0": { @@ -10230,8 +10368,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10262,11 +10400,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "db 0xdc, 0xe8": { @@ -10279,8 +10417,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10311,11 +10449,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsub st1, st0": { @@ -10326,8 +10464,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10358,11 +10496,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsub st2, st0": { @@ -10373,8 +10511,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10405,11 +10543,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsub st3, st0": { @@ -10420,8 +10558,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10452,11 +10590,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsub st4, st0": { @@ -10467,8 +10605,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10499,11 +10637,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsub st5, st0": { @@ -10514,8 +10652,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10546,11 +10684,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsub st6, st0": { @@ -10561,8 +10699,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10593,11 +10731,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsub st7, st0": { @@ -10608,8 +10746,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10640,11 +10778,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "db 0xdc, 0xf0": { @@ -10657,8 +10795,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10689,11 +10827,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivr st1, st0": { @@ -10704,8 +10842,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10736,11 +10874,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivr st2, st0": { @@ -10751,8 +10889,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10783,11 +10921,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivr st3, st0": { @@ -10798,8 +10936,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10830,11 +10968,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivr st4, st0": { @@ -10845,8 +10983,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10877,11 +11015,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivr st5, st0": { @@ -10892,8 +11030,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10924,11 +11062,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivr st6, st0": { @@ -10939,8 +11077,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -10971,11 +11109,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivr st7, st0": { @@ -10986,8 +11124,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11018,11 +11156,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "db 0xdc, 0xf8": { @@ -11035,8 +11173,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11067,11 +11205,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdiv st1, st0": { @@ -11082,8 +11220,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11114,11 +11252,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdiv st2, st0": { @@ -11129,8 +11267,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11159,13 +11297,13 @@ "ldr x8, [x28, #40]", "ldp x16, x17, [x28, #104]", "ld1 {v2.2d, v3.2d}, [sp], #32", - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", - "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", + "ldr x30, [sp], #16", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdiv st3, st0": { @@ -11176,8 +11314,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11208,11 +11346,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdiv st4, st0": { @@ -11223,8 +11361,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11255,11 +11393,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdiv st5, st0": { @@ -11270,8 +11408,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11302,11 +11440,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdiv st6, st0": { @@ -11317,8 +11455,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11349,11 +11487,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdiv st7, st0": { @@ -11364,8 +11502,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -11396,21 +11534,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fld qword [rax]": { - "ExpectedInstructionCount": 39, + "ExpectedInstructionCount": 40, "Comment": [ "0xdd !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -11435,23 +11574,23 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fisttp qword [rax]": { - "ExpectedInstructionCount": 38, + "ExpectedInstructionCount": 39, "Comment": [ "0xdd !11b /1" ], @@ -11485,19 +11624,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "mov x21, x0", - "str x21, [x4]", + "mov x22, x4", + "str x21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fst qword [rax]": { - "ExpectedInstructionCount": 30, + "ExpectedInstructionCount": 31, "Comment": [ "0xdd !11b /2" ], @@ -11530,12 +11670,13 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v2.8b, v0.8b", - "str d2, [x4]" + "mov v3.8b, v0.8b", + "mov x20, x4", + "str d3, [x20]" ] }, "fstp qword [rax]": { - "ExpectedInstructionCount": 38, + "ExpectedInstructionCount": 39, "Comment": [ "0xdd !11b /3" ], @@ -11568,278 +11709,289 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v2.8b, v0.8b", - "str d2, [x4]", + "mov v3.8b, v0.8b", + "mov x21, x4", + "str d3, [x21]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "frstor [rax]": { - "ExpectedInstructionCount": 107, + "ExpectedInstructionCount": 110, "Comment": [ "0xdd !11b /4" ], "ExpectedArm64ASM": [ - "ldrh w20, [x4]", - "strh w20, [x28, #1024]", - "ldr w20, [x4, #4]", - "ubfx w21, w20, #11, #3", - "strb w21, [x28, #747]", - "ubfx w22, w20, #8, #1", - "ubfx w23, w20, #9, #1", - "ubfx w24, w20, #10, #1", - "ubfx w20, w20, #14, #1", - "strb w22, [x28, #744]", - "strb w23, [x28, #745]", - "strb w24, [x28, #746]", - "strb w20, [x28, #750]", - "ldr w20, [x4, #8]", - "ubfx w22, w20, #0, #2", - "mrs x23, nzcv", - "cmp x22, #0x3 (3)", - "cset x22, ne", - "ubfx w24, w20, #2, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #1", - "ubfx w24, w20, #4, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #2", - "ubfx w24, w20, #6, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #3", - "ubfx w24, w20, #8, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #4", - "ubfx w24, w20, #10, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #5", - "ubfx w24, w20, #12, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #6", - "ubfx w20, w20, #14, #2", - "cmp x20, #0x3 (3)", - "cset x20, ne", - "orr w20, w22, w20, lsl #7", - "strb w20, [x28, #1026]", - "add x20, x4, #0x1c (28)", - "mov x22, #0xffffffffffffffff", - "mov w24, #0xffff", - "fmov d2, x22", - "mov v2.d[1], x24", - "ldur q3, [x4, #28]", - "and v3.16b, v3.16b, v2.16b", - "add x0, x28, x21, lsl #4", - "str q3, [x0, #768]", - "add x22, x20, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x20, #10]", - "and v3.16b, v3.16b, v2.16b", - "add x0, x28, x21, lsl #4", - "str q3, [x0, #768]", - "add x20, x22, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x22, #10]", - "and v3.16b, v3.16b, v2.16b", - "add x0, x28, x21, lsl #4", - "str q3, [x0, #768]", - "add x22, x20, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x20, #10]", - "and v3.16b, v3.16b, v2.16b", - "add x0, x28, x21, lsl #4", - "str q3, [x0, #768]", - "add x20, x22, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x22, #10]", - "and v3.16b, v3.16b, v2.16b", - "add x0, x28, x21, lsl #4", - "str q3, [x0, #768]", - "add x22, x20, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x20, #10]", - "and v3.16b, v3.16b, v2.16b", - "add x0, x28, x21, lsl #4", - "str q3, [x0, #768]", - "add x20, x22, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x22, #10]", - "and v2.16b, v3.16b, v2.16b", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]", - "add x22, x20, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur d2, [x20, #10]", - "ldr h3, [x22, #8]", - "mov v2.h[4], v3.h[0]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x23" + "mov x20, x4", + "ldrh w21, [x20]", + "strh w21, [x28, #1024]", + "ldr w21, [x20, #4]", + "ubfx w22, w21, #11, #3", + "strb w22, [x28, #747]", + "ubfx w23, w21, #8, #1", + "ubfx w24, w21, #9, #1", + "ubfx w25, w21, #10, #1", + "ubfx w30, w21, #14, #1", + "strb w23, [x28, #744]", + "strb w24, [x28, #745]", + "strb w25, [x28, #746]", + "strb w30, [x28, #750]", + "ldr w21, [x20, #8]", + "ubfx w23, w21, #0, #2", + "mrs x24, nzcv", + "cmp x23, #0x3 (3)", + "cset x25, ne", + "ubfx w23, w21, #2, #2", + "cmp x23, #0x3 (3)", + "cset x30, ne", + "orr w23, w25, w30, lsl #1", + "ubfx w25, w21, #4, #2", + "cmp x25, #0x3 (3)", + "cset x30, ne", + "orr w25, w23, w30, lsl #2", + "ubfx w23, w21, #6, #2", + "cmp x23, #0x3 (3)", + "cset x30, ne", + "orr w23, w25, w30, lsl #3", + "ubfx w25, w21, #8, #2", + "cmp x25, #0x3 (3)", + "cset x30, ne", + "orr w25, w23, w30, lsl #4", + "ubfx w23, w21, #10, #2", + "cmp x23, #0x3 (3)", + "cset x30, ne", + "orr w23, w25, w30, lsl #5", + "ubfx w25, w21, #12, #2", + "cmp x25, #0x3 (3)", + "cset x30, ne", + "orr w25, w23, w30, lsl #6", + "ubfx w23, w21, #14, #2", + "cmp x23, #0x3 (3)", + "cset x21, ne", + "orr w23, w25, w21, lsl #7", + "strb w23, [x28, #1026]", + "add x21, x20, #0x1c (28)", + "mov x23, #0xffffffffffffffff", + "mov w25, #0xffff", + "fmov d2, x23", + "mov v3.16b, v2.16b", + "mov v3.d[1], x25", + "ldur q2, [x20, #28]", + "and v4.16b, v2.16b, v3.16b", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]", + "add x20, x21, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x21, #10]", + "and v4.16b, v2.16b, v3.16b", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]", + "add x21, x20, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x20, #10]", + "and v4.16b, v2.16b, v3.16b", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]", + "add x20, x21, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x21, #10]", + "and v4.16b, v2.16b, v3.16b", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]", + "add x21, x20, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x20, #10]", + "and v4.16b, v2.16b, v3.16b", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]", + "add x20, x21, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x21, #10]", + "and v4.16b, v2.16b, v3.16b", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]", + "add x21, x20, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x20, #10]", + "and v4.16b, v2.16b, v3.16b", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]", + "add x20, x21, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur d2, [x21, #10]", + "ldr h3, [x20, #8]", + "mov v4.16b, v2.16b", + "mov v4.h[4], v3.h[0]", + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]", + "msr nzcv, x24" ] }, "fnsave [rax]": { - "ExpectedInstructionCount": 119, + "ExpectedInstructionCount": 124, "Comment": [ "0xdd !11b /6" ], "ExpectedArm64ASM": [ - "ldrb w20, [x28, #747]", - "ldrh w21, [x28, #1024]", - "str w21, [x4]", - "mov w21, #0x0", - "mov x22, x21", - "bfi x22, x20, #11, #3", - "ldrb w23, [x28, #744]", - "ldrb w24, [x28, #745]", - "ldrb w25, [x28, #746]", - "ldrb w30, [x28, #750]", - "orr x22, x22, x23, lsl #8", - "orr x22, x22, x24, lsl #9", - "orr x22, x22, x25, lsl #10", - "orr x22, x22, x30, lsl #14", - "str w22, [x4, #4]", - "ldrb w22, [x28, #1026]", - "and w23, w22, #0x1", + "sub sp, sp, #0x20 (32)", + "mov x20, x4", + "ldrb w21, [x28, #747]", + "ldrh w22, [x28, #1024]", + "str w22, [x20]", + "mov w22, #0x0", + "mov x23, x22", + "bfi x23, x21, #11, #3", + "ldrb w24, [x28, #744]", + "ldrb w25, [x28, #745]", + "ldrb w30, [x28, #746]", + "ldrb w18, [x28, #750]", + "strb w21, [sp]", + "orr x21, x23, x24, lsl #8", + "orr x23, x21, x25, lsl #9", + "orr x21, x23, x30, lsl #10", + "orr x23, x21, x18, lsl #14", + "str w23, [x20, #4]", + "ldrb w21, [x28, #1026]", + "and w23, w21, #0x1", "mov w24, #0x3", "mrs x25, nzcv", "cmp x23, #0x0 (0)", - "csel x23, x24, x21, eq", - "orr w23, w21, w23", - "lsr w30, w22, #1", - "and w30, w30, #0x1", - "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #2", - "lsr w30, w22, #2", - "and w30, w30, #0x1", + "csel x30, x24, x22, eq", + "orr w23, w22, w30", + "lsr w30, w21, #1", + "and w18, w30, #0x1", + "cmp x18, #0x0 (0)", + "csel x30, x24, x22, eq", + "orr w18, w23, w30, lsl #2", + "lsr w23, w21, #2", + "and w30, w23, #0x1", "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #4", - "lsr w30, w22, #3", - "and w30, w30, #0x1", - "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #6", - "lsr w30, w22, #4", - "and w30, w30, #0x1", - "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #8", - "lsr w30, w22, #5", - "and w30, w30, #0x1", + "csel x23, x24, x22, eq", + "orr w30, w18, w23, lsl #4", + "lsr w23, w21, #3", + "and w18, w23, #0x1", + "cmp x18, #0x0 (0)", + "csel x23, x24, x22, eq", + "orr w18, w30, w23, lsl #6", + "lsr w23, w21, #4", + "and w30, w23, #0x1", "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #10", - "lsr w30, w22, #6", - "and w30, w30, #0x1", + "csel x23, x24, x22, eq", + "orr w30, w18, w23, lsl #8", + "lsr w23, w21, #5", + "and w18, w23, #0x1", + "cmp x18, #0x0 (0)", + "csel x23, x24, x22, eq", + "orr w18, w30, w23, lsl #10", + "lsr w23, w21, #6", + "and w30, w23, #0x1", "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #12", - "lsr w22, w22, #7", - "and w22, w22, #0x1", - "cmp x22, #0x0 (0)", - "csel x22, x24, x21, eq", - "orr w22, w23, w22, lsl #14", - "str w22, [x4, #8]", - "str w21, [x4, #12]", - "str w21, [x4, #16]", - "str w21, [x4, #20]", - "str w21, [x4, #24]", - "add x22, x4, #0x1c (28)", - "add x0, x28, x20, lsl #4", - "ldr q2, [x0, #768]", - "stur q2, [x4, #28]", - "add x23, x22, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", - "ldr q2, [x0, #768]", - "stur q2, [x22, #10]", - "add x22, x23, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", - "ldr q2, [x0, #768]", - "stur q2, [x23, #10]", - "add x23, x22, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", - "ldr q2, [x0, #768]", - "stur q2, [x22, #10]", - "add x22, x23, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", - "ldr q2, [x0, #768]", - "stur q2, [x23, #10]", - "add x23, x22, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", - "ldr q2, [x0, #768]", - "stur q2, [x22, #10]", - "add x22, x23, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", - "ldr q2, [x0, #768]", - "stur q2, [x23, #10]", - "add x23, x22, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", - "ldr q2, [x0, #768]", - "stur d2, [x22, #10]", - "dup v2.8h, v2.h[4]", - "str h2, [x23, #8]", + "csel x23, x24, x22, eq", + "orr w30, w18, w23, lsl #12", + "lsr w23, w21, #7", + "and w21, w23, #0x1", + "cmp x21, #0x0 (0)", + "csel x23, x24, x22, eq", + "orr w21, w30, w23, lsl #14", + "str w21, [x20, #8]", + "str w22, [x20, #12]", + "str w22, [x20, #16]", + "str w22, [x20, #20]", + "str w22, [x20, #24]", + "add x21, x20, #0x1c (28)", + "ldrb w23, [sp]", + "add x0, x28, x23, lsl #4", + "ldr q2, [x0, #768]", + "stur q2, [x20, #28]", + "add x20, x21, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", + "ldr q2, [x0, #768]", + "stur q2, [x21, #10]", + "add x21, x20, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", + "ldr q2, [x0, #768]", + "stur q2, [x20, #10]", + "add x20, x21, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", + "ldr q2, [x0, #768]", + "stur q2, [x21, #10]", + "add x21, x20, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", + "ldr q2, [x0, #768]", + "stur q2, [x20, #10]", + "add x20, x21, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", + "ldr q2, [x0, #768]", + "stur q2, [x21, #10]", + "add x21, x20, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", + "ldr q2, [x0, #768]", + "stur q2, [x20, #10]", + "add x20, x21, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", + "ldr q2, [x0, #768]", + "stur d2, [x21, #10]", + "dup v3.8h, v2.h[4]", + "str h3, [x20, #8]", "mov w20, #0x37f", "strh w20, [x28, #1024]", - "strb w21, [x28, #747]", - "strb w21, [x28, #744]", - "strb w21, [x28, #745]", - "strb w21, [x28, #746]", - "strb w21, [x28, #750]", - "strb w21, [x28, #1026]", - "msr nzcv, x25" + "strb w22, [x28, #747]", + "strb w22, [x28, #744]", + "strb w22, [x28, #745]", + "strb w22, [x28, #746]", + "strb w22, [x28, #750]", + "strb w22, [x28, #1026]", + "msr nzcv, x25", + "add sp, sp, #0x20 (32)" ] }, "fnstsw [rax]": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 14, "Comment": [ "0xdd !11b /7" ], "ExpectedArm64ASM": [ "mov w20, #0x0", "ldrb w21, [x28, #747]", - "bfi x20, x21, #11, #3", - "ldrb w21, [x28, #744]", - "ldrb w22, [x28, #745]", + "mov x22, x20", + "bfi x22, x21, #11, #3", + "ldrb w20, [x28, #744]", + "ldrb w21, [x28, #745]", "ldrb w23, [x28, #746]", "ldrb w24, [x28, #750]", - "orr x20, x20, x21, lsl #8", - "orr x20, x20, x22, lsl #9", - "orr x20, x20, x23, lsl #10", - "orr x20, x20, x24, lsl #14", - "strh w20, [x4]" + "orr x25, x22, x20, lsl #8", + "orr x20, x25, x21, lsl #9", + "orr x21, x20, x23, lsl #10", + "orr x20, x21, x24, lsl #14", + "mov x21, x4", + "strh w20, [x21]" ] }, "ffree st0": { @@ -11849,12 +12001,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x0 (0)", - "and w20, w20, #0x7", + "add w21, w20, #0x0 (0)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -11866,11 +12018,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w20, w21, w20", - "bic w20, w22, w20", + "lsl w23, w21, w20", + "bic w20, w22, w23", "strb w20, [x28, #1026]" ] }, @@ -11881,12 +12033,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x2 (2)", - "and w20, w20, #0x7", + "add w21, w20, #0x2 (2)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -11897,12 +12049,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x3 (3)", - "and w20, w20, #0x7", + "add w21, w20, #0x3 (3)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -11913,12 +12065,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x4 (4)", - "and w20, w20, #0x7", + "add w21, w20, #0x4 (4)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -11929,12 +12081,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x5 (5)", - "and w20, w20, #0x7", + "add w21, w20, #0x5 (5)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -11945,12 +12097,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x6 (6)", - "and w20, w20, #0x7", + "add w21, w20, #0x6 (6)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -11961,12 +12113,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x7 (7)", - "and w20, w20, #0x7", + "add w21, w20, #0x7 (7)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -11978,10 +12130,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -11993,10 +12145,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -12008,10 +12160,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -12023,10 +12175,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -12038,10 +12190,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -12053,10 +12205,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -12068,10 +12220,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -12083,10 +12235,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -12098,18 +12250,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12122,17 +12274,17 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", + "and w23, w22, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "str q2, [x0, #768]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12144,18 +12296,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12167,18 +12319,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12190,18 +12342,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12213,18 +12365,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12236,18 +12388,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12259,18 +12411,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12283,8 +12435,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x0", "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12318,13 +12470,13 @@ "mov x20, x0", "ubfx x22, x20, #1, #1", "ubfx x23, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w22, w22, w20", - "orr w23, w23, w20", - "strb w22, [x28, #744]", + "ubfx x24, x20, #2, #1", + "orr w20, w22, w24", + "orr w22, w23, w24", + "strb w20, [x28, #744]", "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w23, [x28, #750]" + "strb w24, [x28, #746]", + "strb w22, [x28, #750]" ] }, "fucom st1": { @@ -12335,8 +12487,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12370,14 +12522,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fucom st2": { @@ -12388,8 +12540,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12420,17 +12572,17 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov x20, x0", - "ubfx x21, x20, #1, #1", - "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "mov x20, x0", + "ubfx x21, x20, #1, #1", + "ubfx x22, x20, #0, #1", + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fucom st3": { @@ -12441,8 +12593,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12476,14 +12628,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fucom st4": { @@ -12494,8 +12646,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12529,14 +12681,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fucom st5": { @@ -12547,8 +12699,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12582,14 +12734,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fucom st6": { @@ -12600,8 +12752,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12635,14 +12787,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fucom st7": { @@ -12653,8 +12805,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12688,14 +12840,14 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "fucomp st0": { @@ -12707,8 +12859,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x0", "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12742,20 +12894,20 @@ "mov x22, x0", "ubfx x23, x22, #1, #1", "ubfx x24, x22, #0, #1", - "ubfx x22, x22, #2, #1", - "orr w23, w23, w22", - "orr w24, w24, w22", - "strb w23, [x28, #744]", + "ubfx x25, x22, #2, #1", + "orr w22, w23, w25", + "orr w23, w24, w25", + "strb w22, [x28, #744]", "strb w21, [x28, #745]", - "strb w22, [x28, #746]", - "strb w24, [x28, #750]", + "strb w25, [x28, #746]", + "strb w23, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12768,8 +12920,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12803,20 +12955,20 @@ "mov x22, x0", "ubfx x23, x22, #1, #1", "ubfx x24, x22, #0, #1", - "ubfx x22, x22, #2, #1", - "orr w23, w23, w22", - "orr w24, w24, w22", - "strb w23, [x28, #744]", - "mov w23, #0x0", - "strb w23, [x28, #745]", - "strb w22, [x28, #746]", - "strb w24, [x28, #750]", + "ubfx x25, x22, #2, #1", + "orr w22, w23, w25", + "orr w23, w24, w25", + "strb w22, [x28, #744]", + "mov w22, #0x0", + "strb w22, [x28, #745]", + "strb w25, [x28, #746]", + "strb w23, [x28, #750]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12828,8 +12980,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12863,21 +13015,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12889,8 +13041,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12924,21 +13076,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -12950,8 +13102,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -12985,21 +13137,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -13011,8 +13163,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -13046,21 +13198,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -13072,8 +13224,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -13107,21 +13259,21 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -13133,8 +13285,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -13168,32 +13320,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fiadd word [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xde !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", + "mov x21, x4", + "ldrh w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -13205,7 +13358,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "sxth w1, w21", + "sxth w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1312]", "blr x2", @@ -13250,21 +13403,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fimul word [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xde !11b /1" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", + "mov x21, x4", + "ldrh w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -13276,7 +13430,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "sxth w1, w21", + "sxth w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1312]", "blr x2", @@ -13321,21 +13475,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "ficom word [rax]": { - "ExpectedInstructionCount": 69, + "ExpectedInstructionCount": 70, "Comment": [ "0xde !11b /2" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", + "mov x21, x4", + "ldrh w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -13347,7 +13502,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "sxth w1, w21", + "sxth w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1312]", "blr x2", @@ -13395,24 +13550,25 @@ "mov x20, x0", "ubfx x21, x20, #1, #1", "ubfx x22, x20, #0, #1", - "ubfx x20, x20, #2, #1", - "orr w21, w21, w20", - "orr w22, w22, w20", - "strb w21, [x28, #744]", - "mov w21, #0x0", - "strb w21, [x28, #745]", - "strb w20, [x28, #746]", - "strb w22, [x28, #750]" + "ubfx x23, x20, #2, #1", + "orr w20, w21, w23", + "orr w21, w22, w23", + "strb w20, [x28, #744]", + "mov w20, #0x0", + "strb w20, [x28, #745]", + "strb w23, [x28, #746]", + "strb w21, [x28, #750]" ] }, "ficomp word [rax]": { - "ExpectedInstructionCount": 77, + "ExpectedInstructionCount": 78, "Comment": [ "0xde !11b /3" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", + "mov x21, x4", + "ldrh w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -13424,7 +13580,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "sxth w1, w21", + "sxth w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1312]", "blr x2", @@ -13472,32 +13628,33 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "strb w22, [x28, #744]", - "mov w22, #0x0", - "strb w22, [x28, #745]", - "strb w21, [x28, #746]", - "strb w23, [x28, #750]", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "strb w21, [x28, #744]", + "mov w21, #0x0", + "strb w21, [x28, #745]", + "strb w24, [x28, #746]", + "strb w22, [x28, #750]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fisub word [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xde !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", + "mov x21, x4", + "ldrh w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -13509,7 +13666,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "sxth w1, w21", + "sxth w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1312]", "blr x2", @@ -13554,21 +13711,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fisubr word [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xde !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", + "mov x21, x4", + "ldrh w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -13580,7 +13738,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "sxth w1, w21", + "sxth w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1312]", "blr x2", @@ -13625,21 +13783,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fidiv word [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xde !11b /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", + "mov x21, x4", + "ldrh w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -13651,7 +13810,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "sxth w1, w21", + "sxth w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1312]", "blr x2", @@ -13696,21 +13855,22 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "fidivr word [rax]": { - "ExpectedInstructionCount": 63, + "ExpectedInstructionCount": 64, "Comment": [ "0xde !11b /7" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", + "mov x21, x4", + "ldrh w22, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -13722,7 +13882,7 @@ "st1 {v2.2d, v3.2d}, [x0], #32", "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", - "sxth w1, w21", + "sxth w1, w22", "ldrh w0, [x28, #1024]", "ldr x2, [x28, #1312]", "blr x2", @@ -13767,11 +13927,11 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q4, [x0, #768]" ] }, "faddp st0": { @@ -13782,8 +13942,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -13814,19 +13974,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "faddp st1": { @@ -13838,8 +13998,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -13870,18 +14030,18 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str q4, [x0, #768]" ] }, "faddp st2": { @@ -13892,8 +14052,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -13924,19 +14084,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "faddp st3": { @@ -13947,8 +14107,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -13979,19 +14139,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "faddp st4": { @@ -14002,8 +14162,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14034,19 +14194,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "faddp st5": { @@ -14057,8 +14217,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14089,19 +14249,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "faddp st6": { @@ -14112,8 +14272,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14144,19 +14304,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "faddp st7": { @@ -14167,8 +14327,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14199,19 +14359,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmulp st0": { @@ -14222,8 +14382,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14254,19 +14414,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmulp st1": { @@ -14278,8 +14438,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14310,18 +14470,18 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str q4, [x0, #768]" ] }, "fmulp st2": { @@ -14332,8 +14492,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14364,19 +14524,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmulp st3": { @@ -14387,8 +14547,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14419,19 +14579,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmulp st4": { @@ -14442,8 +14602,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14474,19 +14634,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmulp st5": { @@ -14497,8 +14657,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14529,19 +14689,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmulp st6": { @@ -14552,8 +14712,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14584,19 +14744,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fmulp st7": { @@ -14607,8 +14767,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14639,19 +14799,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fcompp": { @@ -14663,8 +14823,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14698,25 +14858,25 @@ "mov x22, x0", "ubfx x23, x22, #1, #1", "ubfx x24, x22, #0, #1", - "ubfx x22, x22, #2, #1", - "orr w23, w23, w22", - "orr w24, w24, w22", - "strb w23, [x28, #744]", - "mov w23, #0x0", - "strb w23, [x28, #745]", - "strb w22, [x28, #746]", - "strb w24, [x28, #750]", + "ubfx x25, x22, #2, #1", + "orr w22, w23, w25", + "orr w23, w24, w25", + "strb w22, [x28, #744]", + "mov w22, #0x0", + "strb w22, [x28, #745]", + "strb w25, [x28, #746]", + "strb w23, [x28, #750]", "ldrb w22, [x28, #1026]", "lsl w23, w21, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "bic w24, w22, w23", + "strb w24, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "lsl w22, w21, w20", + "bic w21, w24, w22", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -14730,8 +14890,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14762,19 +14922,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubrp st1, st0": { @@ -14786,8 +14946,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14818,18 +14978,18 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str q4, [x0, #768]" ] }, "fsubrp st2, st0": { @@ -14840,8 +15000,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14872,19 +15032,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubrp st3, st0": { @@ -14895,8 +15055,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14927,19 +15087,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubrp st4, st0": { @@ -14950,8 +15110,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -14982,19 +15142,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubrp st5, st0": { @@ -15005,8 +15165,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15037,19 +15197,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubrp st6, st0": { @@ -15060,8 +15220,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15092,19 +15252,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubrp st7, st0": { @@ -15115,8 +15275,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15147,19 +15307,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "db 0xde, 0xe8": { @@ -15172,8 +15332,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15204,19 +15364,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubp st1, st0": { @@ -15228,8 +15388,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15260,18 +15420,18 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str q4, [x0, #768]" ] }, "fsubp st2, st0": { @@ -15282,8 +15442,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15314,19 +15474,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubp st3, st0": { @@ -15337,8 +15497,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15369,19 +15529,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubp st4, st0": { @@ -15392,8 +15552,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15424,19 +15584,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubp st5, st0": { @@ -15447,8 +15607,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15479,19 +15639,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubp st6, st0": { @@ -15502,8 +15662,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15534,19 +15694,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fsubp st7, st0": { @@ -15557,8 +15717,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15589,19 +15749,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "db 0xde, 0xf0": { @@ -15614,8 +15774,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15646,19 +15806,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivrp st1, st0": { @@ -15670,8 +15830,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15702,18 +15862,18 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str q4, [x0, #768]" ] }, "fdivrp st2, st0": { @@ -15724,8 +15884,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15756,19 +15916,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivrp st3, st0": { @@ -15779,8 +15939,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15811,19 +15971,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivrp st4, st0": { @@ -15834,8 +15994,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15866,19 +16026,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivrp st5, st0": { @@ -15889,8 +16049,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15921,19 +16081,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivrp st6, st0": { @@ -15944,8 +16104,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -15976,19 +16136,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivrp st7, st0": { @@ -15999,8 +16159,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16031,19 +16191,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "db 0xde, 0xf8": { @@ -16056,8 +16216,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16088,19 +16248,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivp st1, st0": { @@ -16112,8 +16272,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16144,18 +16304,18 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str q4, [x0, #768]" ] }, "fdivp st2, st0": { @@ -16166,8 +16326,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16198,19 +16358,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivp st3, st0": { @@ -16221,8 +16381,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16253,19 +16413,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivp st4, st0": { @@ -16276,8 +16436,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16308,19 +16468,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivp st5, st0": { @@ -16331,8 +16491,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16363,19 +16523,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivp st6, st0": { @@ -16386,8 +16546,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16418,19 +16578,19 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fdivp st7, st0": { @@ -16441,8 +16601,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16473,66 +16633,71 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "ldrb w22, [x28, #1026]", + "eor v4.16b, v4.16b, v4.16b", + "mov v4.d[0], x0", + "mov v4.h[4], w1", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str q2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str q4, [x0, #768]" ] }, "fild word [rax]": { - "ExpectedInstructionCount": 35, + "ExpectedInstructionCount": 40, "Comment": [ "0xdf !11b /0" ], "ExpectedArm64ASM": [ + "sub sp, sp, #0x20 (32)", "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "ldrh w21, [x4]", - "mov w22, #0x0", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "mov w21, #0x0", + "sxth x23, w22", "mrs x22, nzcv", - "cmp x21, #0x0 (0)", - "mov w23, #0x8000", - "csel x23, x23, xzr, lt", - "cneg x21, x21, mi", - "mov w24, #0x3f", + "cmp x23, #0x0 (0)", + "mov w24, #0x8000", + "csel x25, x24, xzr, lt", + "cneg x24, x23, mi", + "mov w23, #0x3f", "mov x0, #0x3f", - "clz x25, x21", - "sub x25, x0, x25", - "sub x24, x24, x25", - "lsl x25, x21, x24", + "clz x30, x24", + "sub x30, x0, x30", + "sub x18, x23, x30", + "lsl x23, x24, x18", "mov w30, #0x403e", - "sub x24, x30, x24", - "mov w30, #0x0", - "cmp x21, #0x0 (0)", - "csel x21, x30, x24, eq", - "orr x21, x23, x21", - "fmov d2, x25", + "str w22, [sp]", + "sub x22, x30, x18", + "cmp x24, #0x0 (0)", + "csel x30, x21, x22, eq", + "orr x21, x25, x30", + "fmov d2, x23", "fmov d3, x21", - "mov v2.d[1], v3.d[0]", + "mov v4.16b, v2.16b", + "mov v4.d[1], v3.d[0]", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x22" + "str q4, [x0, #768]", + "ldr w20, [sp]", + "msr nzcv, x20", + "add sp, sp, #0x20 (32)" ] }, "fisttp word [rax]": { - "ExpectedInstructionCount": 38, + "ExpectedInstructionCount": 39, "Comment": [ "0xdf !11b /1" ], @@ -16566,19 +16731,20 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "sxth x21, w0", - "strh w21, [x4]", + "mov x22, x4", + "strh w21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fist word [rax]": { - "ExpectedInstructionCount": 30, + "ExpectedInstructionCount": 31, "Comment": [ "0xdf !11b /2" ], @@ -16612,11 +16778,12 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "sxth x20, w0", - "strh w20, [x4]" + "mov x21, x4", + "strh w20, [x21]" ] }, "fistp word [rax]": { - "ExpectedInstructionCount": 38, + "ExpectedInstructionCount": 39, "Comment": [ "0xdf !11b /3" ], @@ -16650,33 +16817,35 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "sxth x21, w0", - "strh w21, [x4]", + "mov x22, x4", + "strh w21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fbld tword [rax]": { - "ExpectedInstructionCount": 40, + "ExpectedInstructionCount": 41, "Comment": [ "0xdf !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "ldr q2, [x4]", + "mov x21, x4", + "ldr q2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -16702,15 +16871,15 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q3, [x0, #768]" ] }, "fbstp tword [rax]": { - "ExpectedInstructionCount": 42, + "ExpectedInstructionCount": 43, "Comment": [ "0xdf !11b /6" ], @@ -16743,19 +16912,20 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "str d2, [x4]", - "mov x21, v2.d[1]", - "strh w21, [x4, #8]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "mov x21, x4", + "str d3, [x21]", + "mov x22, v3.d[1]", + "strh w22, [x21, #8]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -16766,8 +16936,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -16778,8 +16948,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -16790,8 +16960,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -16802,8 +16972,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -16814,8 +16984,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -16826,8 +16996,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -16838,8 +17008,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -16850,41 +17020,45 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fnstsw ax": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 16, "Comment": [ "0xdf 11b 0xe0 /4" ], "ExpectedArm64ASM": [ "mov w20, #0x0", "ldrb w21, [x28, #747]", - "bfi x20, x21, #11, #3", - "ldrb w21, [x28, #744]", - "ldrb w22, [x28, #745]", + "mov x22, x20", + "bfi x22, x21, #11, #3", + "ldrb w20, [x28, #744]", + "ldrb w21, [x28, #745]", "ldrb w23, [x28, #746]", "ldrb w24, [x28, #750]", - "orr x20, x20, x21, lsl #8", - "orr x20, x20, x22, lsl #9", - "orr x20, x20, x23, lsl #10", - "orr x20, x20, x24, lsl #14", - "bfxil x4, x20, #0, #16" + "orr x25, x22, x20, lsl #8", + "orr x20, x25, x21, lsl #9", + "orr x21, x20, x23, lsl #10", + "orr x20, x21, x24, lsl #14", + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "fucomip st0": { - "ExpectedInstructionCount": 52, + "ExpectedInstructionCount": 53, "Comment": [ "0xdf 11b 0xe8 /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16918,25 +17092,26 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "lsl x22, x22, #29", - "orr w22, w22, w23, lsl #30", - "mov w23, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w23, w23, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "lsl x23, x21, #29", + "orr w21, w23, w22, lsl #30", + "mov w22, #0x1", + "eor w23, w24, #0x1", + "mov x26, x23", + "ldrb w23, [x28, #1026]", + "lsl w24, w22, w20", + "bic w22, w23, w24", + "strb w22, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "msr nzcv, x22" + "msr nzcv, x21" ] }, "fucomip st1": { - "ExpectedInstructionCount": 52, + "ExpectedInstructionCount": 53, "Comment": [ "0xdf 11b 0xe9 /5" ], @@ -16944,8 +17119,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -16979,32 +17154,33 @@ "mov x22, x0", "ubfx x23, x22, #1, #1", "ubfx x24, x22, #0, #1", - "ubfx x22, x22, #2, #1", - "orr w23, w23, w22", - "orr w24, w24, w22", - "lsl x23, x23, #29", - "orr w23, w23, w24, lsl #30", - "eor w26, w22, #0x1", - "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "ubfx x25, x22, #2, #1", + "orr w22, w23, w25", + "orr w23, w24, w25", + "lsl x24, x22, #29", + "orr w22, w24, w23, lsl #30", + "eor w23, w25, #0x1", + "mov x26, x23", + "ldrb w23, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "msr nzcv, x23" + "msr nzcv, x22" ] }, "fucomip st2": { - "ExpectedInstructionCount": 52, + "ExpectedInstructionCount": 53, "Comment": [ "0xdf 11b 0xea /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17038,33 +17214,34 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "lsl x22, x22, #29", - "orr w22, w22, w23, lsl #30", - "mov w23, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w23, w23, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "lsl x23, x21, #29", + "orr w21, w23, w22, lsl #30", + "mov w22, #0x1", + "eor w23, w24, #0x1", + "mov x26, x23", + "ldrb w23, [x28, #1026]", + "lsl w24, w22, w20", + "bic w22, w23, w24", + "strb w22, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "msr nzcv, x22" + "msr nzcv, x21" ] }, "fucomip st3": { - "ExpectedInstructionCount": 52, + "ExpectedInstructionCount": 53, "Comment": [ "0xdf 11b 0xeb /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17098,33 +17275,34 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "lsl x22, x22, #29", - "orr w22, w22, w23, lsl #30", - "mov w23, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w23, w23, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "lsl x23, x21, #29", + "orr w21, w23, w22, lsl #30", + "mov w22, #0x1", + "eor w23, w24, #0x1", + "mov x26, x23", + "ldrb w23, [x28, #1026]", + "lsl w24, w22, w20", + "bic w22, w23, w24", + "strb w22, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "msr nzcv, x22" + "msr nzcv, x21" ] }, "fucomip st4": { - "ExpectedInstructionCount": 52, + "ExpectedInstructionCount": 53, "Comment": [ "0xdf 11b 0xec /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17158,33 +17336,34 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "lsl x22, x22, #29", - "orr w22, w22, w23, lsl #30", - "mov w23, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w23, w23, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "lsl x23, x21, #29", + "orr w21, w23, w22, lsl #30", + "mov w22, #0x1", + "eor w23, w24, #0x1", + "mov x26, x23", + "ldrb w23, [x28, #1026]", + "lsl w24, w22, w20", + "bic w22, w23, w24", + "strb w22, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "msr nzcv, x22" + "msr nzcv, x21" ] }, "fucomip st5": { - "ExpectedInstructionCount": 52, + "ExpectedInstructionCount": 53, "Comment": [ "0xdf 11b 0xed /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17218,33 +17397,34 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "lsl x22, x22, #29", - "orr w22, w22, w23, lsl #30", - "mov w23, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w23, w23, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "lsl x23, x21, #29", + "orr w21, w23, w22, lsl #30", + "mov w22, #0x1", + "eor w23, w24, #0x1", + "mov x26, x23", + "ldrb w23, [x28, #1026]", + "lsl w24, w22, w20", + "bic w22, w23, w24", + "strb w22, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "msr nzcv, x22" + "msr nzcv, x21" ] }, "fucomip st6": { - "ExpectedInstructionCount": 52, + "ExpectedInstructionCount": 53, "Comment": [ "0xdf 11b 0xee /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17278,33 +17458,34 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "lsl x22, x22, #29", - "orr w22, w22, w23, lsl #30", - "mov w23, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w23, w23, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "lsl x23, x21, #29", + "orr w21, w23, w22, lsl #30", + "mov w22, #0x1", + "eor w23, w24, #0x1", + "mov x26, x23", + "ldrb w23, [x28, #1026]", + "lsl w24, w22, w20", + "bic w22, w23, w24", + "strb w22, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "msr nzcv, x22" + "msr nzcv, x21" ] }, "fucomip st7": { - "ExpectedInstructionCount": 52, + "ExpectedInstructionCount": 53, "Comment": [ "0xdf 11b 0xef /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17338,33 +17519,34 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "lsl x22, x22, #29", - "orr w22, w22, w23, lsl #30", - "mov w23, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w23, w23, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "lsl x23, x21, #29", + "orr w21, w23, w22, lsl #30", + "mov w22, #0x1", + "eor w23, w24, #0x1", + "mov x26, x23", + "ldrb w23, [x28, #1026]", + "lsl w24, w22, w20", + "bic w22, w23, w24", + "strb w22, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "msr nzcv, x22" + "msr nzcv, x21" ] }, "fcomip st0": { - "ExpectedInstructionCount": 52, + "ExpectedInstructionCount": 53, "Comment": [ "0xdf 11b 0xf0 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17398,25 +17580,26 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "lsl x22, x22, #29", - "orr w22, w22, w23, lsl #30", - "mov w23, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w23, w23, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "lsl x23, x21, #29", + "orr w21, w23, w22, lsl #30", + "mov w22, #0x1", + "eor w23, w24, #0x1", + "mov x26, x23", + "ldrb w23, [x28, #1026]", + "lsl w24, w22, w20", + "bic w22, w23, w24", + "strb w22, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "msr nzcv, x22" + "msr nzcv, x21" ] }, "fcomip st1": { - "ExpectedInstructionCount": 52, + "ExpectedInstructionCount": 53, "Comment": [ "0xdf 11b 0xf1 /6" ], @@ -17424,8 +17607,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17459,32 +17642,33 @@ "mov x22, x0", "ubfx x23, x22, #1, #1", "ubfx x24, x22, #0, #1", - "ubfx x22, x22, #2, #1", - "orr w23, w23, w22", - "orr w24, w24, w22", - "lsl x23, x23, #29", - "orr w23, w23, w24, lsl #30", - "eor w26, w22, #0x1", - "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "ubfx x25, x22, #2, #1", + "orr w22, w23, w25", + "orr w23, w24, w25", + "lsl x24, x22, #29", + "orr w22, w24, w23, lsl #30", + "eor w23, w25, #0x1", + "mov x26, x23", + "ldrb w23, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "msr nzcv, x23" + "msr nzcv, x22" ] }, "fcomip st2": { - "ExpectedInstructionCount": 52, + "ExpectedInstructionCount": 53, "Comment": [ "0xdf 11b 0xf2 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17518,33 +17702,34 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "lsl x22, x22, #29", - "orr w22, w22, w23, lsl #30", - "mov w23, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w23, w23, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "lsl x23, x21, #29", + "orr w21, w23, w22, lsl #30", + "mov w22, #0x1", + "eor w23, w24, #0x1", + "mov x26, x23", + "ldrb w23, [x28, #1026]", + "lsl w24, w22, w20", + "bic w22, w23, w24", + "strb w22, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "msr nzcv, x22" + "msr nzcv, x21" ] }, "fcomip st3": { - "ExpectedInstructionCount": 52, + "ExpectedInstructionCount": 53, "Comment": [ "0xdf 11b 0xf3 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17578,33 +17763,34 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "lsl x22, x22, #29", - "orr w22, w22, w23, lsl #30", - "mov w23, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w23, w23, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "lsl x23, x21, #29", + "orr w21, w23, w22, lsl #30", + "mov w22, #0x1", + "eor w23, w24, #0x1", + "mov x26, x23", + "ldrb w23, [x28, #1026]", + "lsl w24, w22, w20", + "bic w22, w23, w24", + "strb w22, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "msr nzcv, x22" + "msr nzcv, x21" ] }, "fcomip st4": { - "ExpectedInstructionCount": 52, + "ExpectedInstructionCount": 53, "Comment": [ "0xdf 11b 0xf4 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17638,33 +17824,34 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "lsl x22, x22, #29", - "orr w22, w22, w23, lsl #30", - "mov w23, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w23, w23, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "lsl x23, x21, #29", + "orr w21, w23, w22, lsl #30", + "mov w22, #0x1", + "eor w23, w24, #0x1", + "mov x26, x23", + "ldrb w23, [x28, #1026]", + "lsl w24, w22, w20", + "bic w22, w23, w24", + "strb w22, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "msr nzcv, x22" + "msr nzcv, x21" ] }, "fcomip st5": { - "ExpectedInstructionCount": 52, + "ExpectedInstructionCount": 53, "Comment": [ "0xdf 11b 0xf5 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17698,33 +17885,34 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "lsl x22, x22, #29", - "orr w22, w22, w23, lsl #30", - "mov w23, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w23, w23, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "lsl x23, x21, #29", + "orr w21, w23, w22, lsl #30", + "mov w22, #0x1", + "eor w23, w24, #0x1", + "mov x26, x23", + "ldrb w23, [x28, #1026]", + "lsl w24, w22, w20", + "bic w22, w23, w24", + "strb w22, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "msr nzcv, x22" + "msr nzcv, x21" ] }, "fcomip st6": { - "ExpectedInstructionCount": 52, + "ExpectedInstructionCount": 53, "Comment": [ "0xdf 11b 0xf6 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17758,33 +17946,34 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "lsl x22, x22, #29", - "orr w22, w22, w23, lsl #30", - "mov w23, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w23, w23, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "lsl x23, x21, #29", + "orr w21, w23, w22, lsl #30", + "mov w22, #0x1", + "eor w23, w24, #0x1", + "mov x26, x23", + "ldrb w23, [x28, #1026]", + "lsl w24, w22, w20", + "bic w22, w23, w24", + "strb w22, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "msr nzcv, x22" + "msr nzcv, x21" ] }, "fcomip st7": { - "ExpectedInstructionCount": 52, + "ExpectedInstructionCount": 53, "Comment": [ "0xdf 11b 0xf7 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr q2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", @@ -17818,21 +18007,22 @@ "mov x21, x0", "ubfx x22, x21, #1, #1", "ubfx x23, x21, #0, #1", - "ubfx x21, x21, #2, #1", - "orr w22, w22, w21", - "orr w23, w23, w21", - "lsl x22, x22, #29", - "orr w22, w22, w23, lsl #30", - "mov w23, #0x1", - "eor w26, w21, #0x1", - "ldrb w21, [x28, #1026]", - "lsl w23, w23, w20", - "bic w21, w21, w23", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "ubfx x24, x21, #2, #1", + "orr w21, w22, w24", + "orr w22, w23, w24", + "lsl x23, x21, #29", + "orr w21, w23, w22, lsl #30", + "mov w22, #0x1", + "eor w23, w24, #0x1", + "mov x26, x23", + "ldrb w23, [x28, #1026]", + "lsl w24, w22, w20", + "bic w22, w23, w24", + "strb w22, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "strb w20, [x28, #747]", - "msr nzcv, x22" + "msr nzcv, x21" ] } } diff --git a/unittests/InstructionCountCI/x87_f64.json b/unittests/InstructionCountCI/x87_f64.json index 5dbd134b69..791ce6fc35 100644 --- a/unittests/InstructionCountCI/x87_f64.json +++ b/unittests/InstructionCountCI/x87_f64.json @@ -15,155 +15,163 @@ }, "Instructions": { "fadd dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xd8 !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "add x0, x28, x20, lsl #4", - "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "ldr d2, [x0, #768]", + "fadd d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xd8 !11b /1" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "add x0, x28, x20, lsl #4", - "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "ldr d2, [x0, #768]", + "fmul d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fcom dword [rax]": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 17, "Comment": [ "0xd8 !11b /2" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "add x0, x28, x20, lsl #4", - "ldr d3, [x0, #768]", - "fcmp d3, d2", + "ldr d2, [x0, #768]", + "fcmp d2, d3", "mov w20, #0x0", "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] }, "fcomp dword [rax]": { - "ExpectedInstructionCount": 24, + "ExpectedInstructionCount": 25, "Comment": [ "0xd8 !11b /3" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "add x0, x28, x20, lsl #4", - "ldr d3, [x0, #768]", - "fcmp d3, d2", + "ldr d2, [x0, #768]", + "fcmp d2, d3", "mov w21, #0x1", "mov w22, #0x0", "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fsub dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xd8 !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "add x0, x28, x20, lsl #4", - "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "ldr d2, [x0, #768]", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xd8 !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "add x0, x28, x20, lsl #4", - "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "ldr d2, [x0, #768]", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xd8 !11b /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "add x0, x28, x20, lsl #4", - "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "ldr d2, [x0, #768]", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xd8 !11b /7" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "add x0, x28, x20, lsl #4", - "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "ldr d2, [x0, #768]", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fadd st0, st0": { @@ -174,14 +182,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fadd st0, st1": { @@ -192,14 +200,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fadd st0, st2": { @@ -210,14 +218,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fadd st0, st3": { @@ -228,14 +236,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fadd st0, st4": { @@ -246,14 +254,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fadd st0, st5": { @@ -264,14 +272,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fadd st0, st6": { @@ -282,14 +290,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fadd st0, st7": { @@ -300,14 +308,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul st0, st0": { @@ -318,14 +326,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmul st0, st1": { @@ -336,14 +344,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul st0, st2": { @@ -354,14 +362,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul st0, st3": { @@ -372,14 +380,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul st0, st4": { @@ -390,14 +398,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul st0, st5": { @@ -408,14 +416,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul st0, st6": { @@ -426,14 +434,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul st0, st7": { @@ -444,14 +452,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fcom st0, st0": { @@ -463,8 +471,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x0", "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -472,10 +480,10 @@ "cset w20, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w20", - "strb w23, [x28, #744]", - "orr w22, w22, w20", - "strb w22, [x28, #750]", + "orr w24, w23, w20", + "strb w24, [x28, #744]", + "orr w23, w22, w20", + "strb w23, [x28, #750]", "strb w21, [x28, #745]", "strb w20, [x28, #746]" ] @@ -488,8 +496,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -498,10 +506,10 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] @@ -514,8 +522,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -524,10 +532,10 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] @@ -540,8 +548,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -550,10 +558,10 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] @@ -566,8 +574,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -576,10 +584,10 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] @@ -592,8 +600,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -602,10 +610,10 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] @@ -618,8 +626,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -628,10 +636,10 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] @@ -644,8 +652,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -654,10 +662,10 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] @@ -671,8 +679,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x0", "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -681,18 +689,18 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w21, [x28, #745]", "strb w23, [x28, #746]", "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -705,8 +713,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -715,18 +723,18 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -738,8 +746,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -749,18 +757,18 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -772,8 +780,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -783,18 +791,18 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -806,8 +814,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -817,18 +825,18 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -840,8 +848,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -851,18 +859,18 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -874,8 +882,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -885,18 +893,18 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -908,8 +916,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -919,18 +927,18 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -942,14 +950,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsub st0, st1": { @@ -960,14 +968,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsub st0, st2": { @@ -978,14 +986,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsub st0, st3": { @@ -996,14 +1004,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsub st0, st4": { @@ -1014,14 +1022,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsub st0, st5": { @@ -1032,14 +1040,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsub st0, st6": { @@ -1050,14 +1058,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsub st0, st7": { @@ -1068,14 +1076,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr st0, st0": { @@ -1086,14 +1094,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubr st0, st1": { @@ -1104,14 +1112,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr st0, st2": { @@ -1122,14 +1130,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr st0, st3": { @@ -1140,14 +1148,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr st0, st4": { @@ -1158,14 +1166,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr st0, st5": { @@ -1176,14 +1184,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr st0, st6": { @@ -1194,14 +1202,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr st0, st7": { @@ -1212,14 +1220,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv st0, st0": { @@ -1230,14 +1238,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdiv st0, st1": { @@ -1248,14 +1256,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv st0, st2": { @@ -1266,14 +1274,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv st0, st3": { @@ -1284,14 +1292,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv st0, st4": { @@ -1302,14 +1310,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv st0, st5": { @@ -1320,14 +1328,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv st0, st6": { @@ -1338,14 +1346,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv st0, st7": { @@ -1356,14 +1364,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr st0, st0": { @@ -1374,14 +1382,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivr st0, st1": { @@ -1392,14 +1400,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr st0, st2": { @@ -1410,14 +1418,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr st0, st3": { @@ -1428,14 +1436,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr st0, st4": { @@ -1446,14 +1454,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr st0, st5": { @@ -1464,14 +1472,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr st0, st6": { @@ -1482,14 +1490,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr st0, st7": { @@ -1500,39 +1508,40 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fld dword [rax]": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xd9 !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr s2, [x4]", - "fcvt d2, s2", + "mov x21, x4", + "ldr s2, [x21]", + "fcvt d3, s2", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "fst dword [rax]": { - "ExpectedInstructionCount": 5, + "ExpectedInstructionCount": 6, "Comment": [ "0xd9 !11b /2" ], @@ -1540,12 +1549,13 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "fcvt s2, d2", - "str s2, [x4]" + "fcvt s3, d2", + "mov x20, x4", + "str s3, [x20]" ] }, "fstp dword [rax]": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xd9 !11b /3" ], @@ -1553,98 +1563,101 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "fcvt s2, d2", - "str s2, [x4]", + "fcvt s3, d2", + "mov x21, x4", + "str s3, [x21]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fldenv [rax]": { - "ExpectedInstructionCount": 56, + "ExpectedInstructionCount": 57, "Comment": [ "0xd9 !11b /4" ], "ExpectedArm64ASM": [ - "ldrh w20, [x4]", - "ubfx w21, w20, #10, #3", - "rbit w1, w21", + "mov x20, x4", + "ldrh w21, [x20]", + "ubfx w22, w21, #10, #3", + "rbit w1, w22", "lsr w1, w1, #30", "mrs x0, fpcr", "bfi x0, x1, #22, #2", - "lsr x1, x21, #2", + "lsr x1, x22, #2", "bfi x0, x1, #24, #1", "msr fpcr, x0", - "strh w20, [x28, #1024]", - "ldr w20, [x4, #4]", - "ubfx w21, w20, #11, #3", - "strb w21, [x28, #747]", - "ubfx w21, w20, #8, #1", - "ubfx w22, w20, #9, #1", - "ubfx w23, w20, #10, #1", - "ubfx w20, w20, #14, #1", - "strb w21, [x28, #744]", - "strb w22, [x28, #745]", - "strb w23, [x28, #746]", - "strb w20, [x28, #750]", - "ldr w20, [x4, #8]", - "ubfx w21, w20, #0, #2", + "strh w21, [x28, #1024]", + "ldr w21, [x20, #4]", + "ubfx w22, w21, #11, #3", + "strb w22, [x28, #747]", + "ubfx w22, w21, #8, #1", + "ubfx w23, w21, #9, #1", + "ubfx w24, w21, #10, #1", + "ubfx w25, w21, #14, #1", + "strb w22, [x28, #744]", + "strb w23, [x28, #745]", + "strb w24, [x28, #746]", + "strb w25, [x28, #750]", + "ldr w21, [x20, #8]", + "ubfx w20, w21, #0, #2", "mrs x22, nzcv", - "cmp x21, #0x3 (3)", - "cset x21, ne", - "ubfx w23, w20, #2, #2", - "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #1", - "ubfx w23, w20, #4, #2", - "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #2", - "ubfx w23, w20, #6, #2", - "cmp x23, #0x3 (3)", + "cmp x20, #0x3 (3)", "cset x23, ne", - "orr w21, w21, w23, lsl #3", - "ubfx w23, w20, #8, #2", + "ubfx w20, w21, #2, #2", + "cmp x20, #0x3 (3)", + "cset x24, ne", + "orr w20, w23, w24, lsl #1", + "ubfx w23, w21, #4, #2", "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #4", - "ubfx w23, w20, #10, #2", + "cset x24, ne", + "orr w23, w20, w24, lsl #2", + "ubfx w20, w21, #6, #2", + "cmp x20, #0x3 (3)", + "cset x24, ne", + "orr w20, w23, w24, lsl #3", + "ubfx w23, w21, #8, #2", "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #5", - "ubfx w23, w20, #12, #2", + "cset x24, ne", + "orr w23, w20, w24, lsl #4", + "ubfx w20, w21, #10, #2", + "cmp x20, #0x3 (3)", + "cset x24, ne", + "orr w20, w23, w24, lsl #5", + "ubfx w23, w21, #12, #2", "cmp x23, #0x3 (3)", - "cset x23, ne", - "orr w21, w21, w23, lsl #6", - "ubfx w20, w20, #14, #2", + "cset x24, ne", + "orr w23, w20, w24, lsl #6", + "ubfx w20, w21, #14, #2", "cmp x20, #0x3 (3)", - "cset x20, ne", - "orr w20, w21, w20, lsl #7", + "cset x21, ne", + "orr w20, w23, w21, lsl #7", "strb w20, [x28, #1026]", "msr nzcv, x22" ] }, "fldcw [rax]": { - "ExpectedInstructionCount": 10, + "ExpectedInstructionCount": 11, "Comment": [ "0xd9 !11b /5" ], "ExpectedArm64ASM": [ - "ldrh w20, [x4]", - "ubfx w21, w20, #10, #3", - "rbit w1, w21", + "mov x20, x4", + "ldrh w21, [x20]", + "ubfx w20, w21, #10, #3", + "rbit w1, w20", "lsr w1, w1, #30", "mrs x0, fpcr", "bfi x0, x1, #22, #2", - "lsr x1, x21, #2", + "lsr x1, x20, #2", "bfi x0, x1, #24, #1", "msr fpcr, x0", - "strh w20, [x28, #1024]" + "strh w21, [x28, #1024]" ] }, "fnstenv [rax]": { @@ -1653,80 +1666,81 @@ "0xd9 !11b /6" ], "ExpectedArm64ASM": [ - "ldrh w20, [x28, #1024]", - "str w20, [x4]", - "mov w20, #0x0", - "ldrb w21, [x28, #747]", - "mov x0, x20", - "bfi x0, x21, #11, #3", - "mov x21, x0", + "mov x20, x4", + "ldrh w21, [x28, #1024]", + "str w21, [x20]", + "mov w21, #0x0", + "ldrb w22, [x28, #747]", + "mov x23, x21", + "bfi x23, x22, #11, #3", "ldrb w22, [x28, #744]", - "ldrb w23, [x28, #745]", - "ldrb w24, [x28, #746]", - "ldrb w25, [x28, #750]", - "orr x21, x21, x22, lsl #8", - "orr x21, x21, x23, lsl #9", - "orr x21, x21, x24, lsl #10", - "orr x21, x21, x25, lsl #14", - "str w21, [x4, #4]", - "ldrb w21, [x28, #1026]", - "and w22, w21, #0x1", - "mov w23, #0x3", - "mrs x24, nzcv", + "ldrb w24, [x28, #745]", + "ldrb w25, [x28, #746]", + "ldrb w30, [x28, #750]", + "orr x18, x23, x22, lsl #8", + "orr x22, x18, x24, lsl #9", + "orr x23, x22, x25, lsl #10", + "orr x22, x23, x30, lsl #14", + "str w22, [x20, #4]", + "ldrb w22, [x28, #1026]", + "and w23, w22, #0x1", + "mov w24, #0x3", + "mrs x25, nzcv", + "cmp x23, #0x0 (0)", + "csel x30, x24, x21, eq", + "orr w23, w21, w30", + "lsr w30, w22, #1", + "and w18, w30, #0x1", + "cmp x18, #0x0 (0)", + "csel x30, x24, x21, eq", + "orr w18, w23, w30, lsl #2", + "lsr w23, w22, #2", + "and w30, w23, #0x1", + "cmp x30, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w30, w18, w23, lsl #4", + "lsr w23, w22, #3", + "and w18, w23, #0x1", + "cmp x18, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w18, w30, w23, lsl #6", + "lsr w23, w22, #4", + "and w30, w23, #0x1", + "cmp x30, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w30, w18, w23, lsl #8", + "lsr w23, w22, #5", + "and w18, w23, #0x1", + "cmp x18, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w18, w30, w23, lsl #10", + "lsr w23, w22, #6", + "and w30, w23, #0x1", + "cmp x30, #0x0 (0)", + "csel x23, x24, x21, eq", + "orr w30, w18, w23, lsl #12", + "lsr w23, w22, #7", + "and w22, w23, #0x1", "cmp x22, #0x0 (0)", - "csel x22, x23, x20, eq", - "orr w22, w20, w22", - "lsr w25, w21, #1", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #2", - "lsr w25, w21, #2", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #4", - "lsr w25, w21, #3", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #6", - "lsr w25, w21, #4", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #8", - "lsr w25, w21, #5", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #10", - "lsr w25, w21, #6", - "and w25, w25, #0x1", - "cmp x25, #0x0 (0)", - "csel x25, x23, x20, eq", - "orr w22, w22, w25, lsl #12", - "lsr w21, w21, #7", - "and w21, w21, #0x1", - "cmp x21, #0x0 (0)", - "csel x21, x23, x20, eq", - "orr w21, w22, w21, lsl #14", - "str w21, [x4, #8]", - "str w20, [x4, #12]", - "str w20, [x4, #16]", - "str w20, [x4, #20]", - "str w20, [x4, #24]", - "msr nzcv, x24" + "csel x23, x24, x21, eq", + "orr w22, w30, w23, lsl #14", + "str w22, [x20, #8]", + "str w21, [x20, #12]", + "str w21, [x20, #16]", + "str w21, [x20, #20]", + "str w21, [x20, #24]", + "msr nzcv, x25" ] }, "fnstcw [rax]": { - "ExpectedInstructionCount": 2, + "ExpectedInstructionCount": 3, "Comment": [ "0xd9 !11b /7" ], "ExpectedArm64ASM": [ "ldrh w20, [x28, #1024]", - "strh w20, [x4]" + "mov x21, x4", + "strh w20, [x21]" ] }, "fld st0": { @@ -1737,15 +1751,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -1761,14 +1775,14 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -1783,15 +1797,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -1806,15 +1820,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -1829,15 +1843,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -1852,15 +1866,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -1875,15 +1889,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -1898,15 +1912,15 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -1921,14 +1935,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -1940,14 +1954,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -1959,14 +1973,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -1978,14 +1992,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -1997,14 +2011,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -2016,14 +2030,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -2035,14 +2049,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -2054,14 +2068,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q3, [x0, #768]", "add x0, x28, x20, lsl #4", "str q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -2081,9 +2095,9 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "fneg v2.2d, v2.2d", + "fneg v3.2d, v2.2d", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "fabs": { @@ -2095,9 +2109,9 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "fabs d2, d2", + "fabs d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "ftst": { @@ -2115,10 +2129,10 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] @@ -2133,19 +2147,19 @@ "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", "mov x21, v2.d[0]", - "lsr x21, x21, #63", - "strb w21, [x28, #745]", + "lsr x22, x21, #63", + "strb w22, [x28, #745]", "ldrb w21, [x28, #1026]", - "lsr w20, w21, w20", - "mov w21, #0x1", - "and w20, w20, #0x1", + "lsr w22, w21, w20", + "mov w20, #0x1", + "and w21, w22, #0x1", "mov w22, #0x0", "mrs x23, nzcv", - "cmp x20, #0x1 (1)", - "csel x21, x22, x21, eq", - "strb w21, [x28, #744]", - "strb w20, [x28, #746]", - "strb w21, [x28, #750]", + "cmp x21, #0x1 (1)", + "csel x24, x22, x20, eq", + "strb w24, [x28, #744]", + "strb w21, [x28, #746]", + "strb w24, [x28, #750]", "msr nzcv, x23" ] }, @@ -2157,11 +2171,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "mov x21, #0x3ff0000000000000", @@ -2178,11 +2192,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "mov x21, #0xa372", @@ -2202,11 +2216,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "mov x21, #0x82fe", @@ -2226,11 +2240,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "mov x21, #0x2d18", @@ -2250,11 +2264,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "mov x21, #0x79ff", @@ -2274,11 +2288,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "mov x21, #0x39ef", @@ -2298,11 +2312,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "mov w21, #0x0", @@ -2370,9 +2384,9 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", + "mov v3.8b, v0.8b", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "fyl2x": { @@ -2384,15 +2398,15 @@ "ldrb w20, [x28, #747]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "strb w21, [x28, #747]", + "and w22, w21, #0x7", + "strb w22, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr d3, [x0, #768]", "mov v0.8b, v2.8b", "mov v1.8b, v3.8b", @@ -2445,9 +2459,9 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "mov v4.8b, v0.8b", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fptan": { @@ -2459,12 +2473,12 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "sub w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w22", - "orr w21, w23, w21", + "and w23, w22, #0x7", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w23", + "orr w21, w22, w24", "strb w21, [x28, #1026]", - "strb w22, [x28, #747]", + "strb w23, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", @@ -2517,15 +2531,15 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", + "mov v3.8b, v0.8b", "mov x21, #0x3ff0000000000000", - "fmov d3, x21", + "fmov d2, x21", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]", - "add x0, x28, x22, lsl #4", - "str d3, [x0, #768]" + "str d3, [x0, #768]", + "add x0, x28, x23, lsl #4", + "str d2, [x0, #768]" ] }, "fpatan": { @@ -2537,15 +2551,15 @@ "ldrb w20, [x28, #747]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "strb w21, [x28, #747]", + "and w22, w21, #0x7", + "strb w22, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr d3, [x0, #768]", "mov v0.8b, v3.8b", "mov v1.8b, v2.8b", @@ -2598,9 +2612,9 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "mov v4.8b, v0.8b", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fxtract": { @@ -2612,25 +2626,25 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "sub w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w22", - "orr w21, w23, w21", + "and w23, w22, #0x7", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w23", + "orr w21, w22, w24", "strb w21, [x28, #1026]", - "strb w22, [x28, #747]", + "strb w23, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", "mov x21, v2.d[0]", - "and x23, x21, #0x7ff0000000000000", - "lsr x23, x23, #52", - "sub x23, x23, #0x3ff (1023)", - "scvtf d2, x23", - "and x21, x21, #0x800fffffffffffff", - "orr x21, x21, #0x3ff0000000000000", + "and x22, x21, #0x7ff0000000000000", + "lsr x24, x22, #52", + "sub x22, x24, #0x3ff (1023)", + "scvtf d2, x22", + "and x22, x21, #0x800fffffffffffff", + "orr x21, x22, #0x3ff0000000000000", "fmov d3, x21", "add x0, x28, x20, lsl #4", "str d2, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "str d3, [x0, #768]" ] }, @@ -2642,10 +2656,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr d3, [x0, #768]", "mov v0.8b, v2.8b", "mov v1.8b, v3.8b", @@ -2698,11 +2712,11 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", + "mov v4.8b, v0.8b", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdecstp": { @@ -2712,8 +2726,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -2724,8 +2738,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -2737,10 +2751,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr d3, [x0, #768]", "mov v0.8b, v2.8b", "mov v1.8b, v3.8b", @@ -2793,11 +2807,11 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", + "mov v4.8b, v0.8b", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fyl2xp1": { @@ -2809,20 +2823,20 @@ "ldrb w20, [x28, #747]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "strb w21, [x28, #747]", + "and w22, w21, #0x7", + "strb w22, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr d3, [x0, #768]", "mov x20, #0x3ff0000000000000", "fmov d4, x20", - "fadd d2, d2, d4", - "mov v0.8b, v2.8b", + "fadd d5, d2, d4", + "mov v0.8b, v5.8b", "mov v1.8b, v3.8b", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -2874,7 +2888,7 @@ "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", "mov v2.8b, v0.8b", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str d2, [x0, #768]" ] }, @@ -2887,9 +2901,9 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "fsqrt d2, d2", + "fsqrt d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "fsincos": { @@ -2901,12 +2915,12 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "sub w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w22", - "orr w21, w23, w21", + "and w23, w22, #0x7", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w23", + "orr w21, w22, w24", "strb w21, [x28, #1026]", - "strb w22, [x28, #747]", + "strb w23, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", @@ -3010,13 +3024,13 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", + "mov v4.8b, v0.8b", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", "str d3, [x0, #768]", - "add x0, x28, x22, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str d4, [x0, #768]" ] }, "frndint": { @@ -3028,9 +3042,9 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "frinti d2, d2", + "frinti d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "fscale": { @@ -3041,10 +3055,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr d3, [x0, #768]", "mov v0.8b, v2.8b", "mov v1.8b, v3.8b", @@ -3097,9 +3111,9 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", + "mov v4.8b, v0.8b", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsin": { @@ -3161,11 +3175,11 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", + "mov v3.8b, v0.8b", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "fcos": { @@ -3227,54 +3241,57 @@ "ldp x19, x29, [x28, #120]", "ldr x26, [x28, #752]", "ldr x27, [x28, #760]", - "mov v2.8b, v0.8b", + "mov v3.8b, v0.8b", "mov w21, #0x0", "strb w21, [x28, #746]", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "fiadd dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xda !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fimul dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xda !11b /1" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "ficom dword [rax]": { - "ExpectedInstructionCount": 16, + "ExpectedInstructionCount": 17, "Comment": [ "0xda !11b /2" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", @@ -3282,23 +3299,24 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] }, "ficomp dword [rax]": { - "ExpectedInstructionCount": 24, + "ExpectedInstructionCount": 25, "Comment": [ "0xda !11b /3" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", @@ -3307,87 +3325,91 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fisub dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xda !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fisubr dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xda !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fidiv dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xda !11b /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fidivr dword [rax]": { - "ExpectedInstructionCount": 8, + "ExpectedInstructionCount": 9, "Comment": [ "0xda !11b /7" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fcmovb st0, st0": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc0 /0" ], @@ -3396,18 +3418,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc1 /0" ], @@ -3416,18 +3439,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st2": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc2 /0" ], @@ -3436,18 +3460,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st3": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc3 /0" ], @@ -3456,18 +3481,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st4": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc4 /0" ], @@ -3476,18 +3502,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st5": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc5 /0" ], @@ -3496,18 +3523,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st6": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc6 /0" ], @@ -3516,18 +3544,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovb st0, st7": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc7 /0" ], @@ -3536,18 +3565,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st0": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc8 /1" ], @@ -3556,18 +3586,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xc9 /1" ], @@ -3576,18 +3607,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st2": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xca /1" ], @@ -3596,18 +3628,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st3": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xcb /1" ], @@ -3616,18 +3649,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st4": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xcc /1" ], @@ -3636,18 +3670,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st5": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xcd /1" ], @@ -3656,18 +3691,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st6": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xce /1" ], @@ -3676,18 +3712,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmove st0, st7": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xda 11b 0xcf /1" ], @@ -3696,398 +3733,423 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st0": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd0 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st1": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd1 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st2": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd2 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st3": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd3 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st4": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd4 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st5": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd5 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st6": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd6 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovbe st0, st7": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xda 11b 0xd7 /0" ], "ExpectedArm64ASM": [ "mov x20, #0xffffffffffffffff", "csetm x21, hs", - "csel x20, x20, x21, eq", - "dup v2.2d, x20", + "csel x22, x20, x21, eq", + "dup v2.2d, x22", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovu st0, st0": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xd8 /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x0 (0)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st1": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xd9 /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x1 (1)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st2": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xda /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x2 (2)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x2 (2)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st3": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xdb /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x3 (3)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x3 (3)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st4": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xdc /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x4 (4)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x4 (4)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st5": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xdd /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x5 (5)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x5 (5)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st6": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xde /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x6 (6)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x6 (6)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovu st0, st7": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xda 11b 0xdf /1" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eon w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x7 (7)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eon w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x7 (7)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fucompp": { @@ -4099,8 +4161,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -4109,49 +4171,50 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", "lsl w23, w21, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "bic w24, w22, w23", + "strb w24, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "lsl w22, w21, w20", + "bic w21, w24, w22", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fild dword [rax]": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdf !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "ldr w21, [x4]", - "scvtf d2, w21", + "mov x21, x4", + "ldr w22, [x21]", + "scvtf d2, w22", "add x0, x28, x20, lsl #4", "str d2, [x0, #768]" ] }, "fisttp dword [rax]": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb !11b /1" ], @@ -4160,19 +4223,20 @@ "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", "fcvtzs w21, d2", - "str w21, [x4]", + "mov x22, x4", + "str w21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fist dword [rax]": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": [ "0xdb !11b /2" ], @@ -4182,11 +4246,12 @@ "ldr d2, [x0, #768]", "frinti d0, d2", "fcvtzs w20, d0", - "str w20, [x4]" + "mov x21, x4", + "str w20, [x21]" ] }, "fistp dword [rax]": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xdf !11b /7" ], @@ -4196,25 +4261,27 @@ "ldr d2, [x0, #768]", "frinti d0, d2", "fcvtzs w21, d0", - "str w21, [x4]", + "mov x22, x4", + "str w21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fld tword [rax]": { - "ExpectedInstructionCount": 38, + "ExpectedInstructionCount": 39, "Comment": [ "0xdb !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr q2, [x4]", + "mov x21, x4", + "ldr q2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -4240,21 +4307,21 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v2.8b, v0.8b", + "mov v3.8b, v0.8b", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d3, [x0, #768]" ] }, "fstp tword [rax]": { - "ExpectedInstructionCount": 41, + "ExpectedInstructionCount": 42, "Comment": [ "0xdb !11b /7" ], @@ -4286,24 +4353,25 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "str d2, [x4]", - "mov x21, v2.d[1]", - "strh w21, [x4, #8]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "mov x21, x4", + "str d3, [x21]", + "mov x22, v3.d[1]", + "strh w22, [x21, #8]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fcmovnb st0, st0": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc0 /0" ], @@ -4312,18 +4380,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc1 /0" ], @@ -4332,18 +4401,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st2": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc2 /0" ], @@ -4352,18 +4422,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st3": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc3 /0" ], @@ -4372,18 +4443,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st4": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc4 /0" ], @@ -4392,18 +4464,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st5": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc5 /0" ], @@ -4412,18 +4485,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st6": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc6 /0" ], @@ -4432,18 +4506,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnb st0, st7": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc7 /0" ], @@ -4452,18 +4527,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st0": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc8 /1" ], @@ -4472,18 +4548,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st1": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xc9 /1" ], @@ -4492,18 +4569,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st2": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xca /1" ], @@ -4512,18 +4590,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st3": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xcb /1" ], @@ -4532,18 +4611,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st4": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xcc /1" ], @@ -4552,18 +4632,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st5": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xcd /1" ], @@ -4572,18 +4653,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st6": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xce /1" ], @@ -4592,18 +4674,19 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovne st0, st7": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdb 11b 0xcf /1" ], @@ -4612,390 +4695,415 @@ "dup v2.2d, x20", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st0": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd0 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st1": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd1 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st2": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd2 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st3": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd3 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st4": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd4 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st5": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd5 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st6": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd6 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnbe st0, st7": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdb 11b 0xd7 /2" ], "ExpectedArm64ASM": [ "csetm x20, lo", - "csel x20, x20, xzr, ne", - "dup v2.2d, x20", + "csel x21, x20, xzr, ne", + "dup v2.2d, x21", "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]" + "str q5, [x0, #768]" ] }, "fcmovnu st0, st0": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xd8 /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x0 (0)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st1": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xd9 /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x1 (1)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st2": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xda /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x2 (2)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x2 (2)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st3": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xdb /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x3 (3)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x3 (3)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st4": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xdc /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x4 (4)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x4 (4)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st5": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xdd /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x5 (5)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x5 (5)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st6": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xde /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x6 (6)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x6 (6)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fcmovnu st0, st7": { - "ExpectedInstructionCount": 18, + "ExpectedInstructionCount": 20, "Comment": [ "0xdb 11b 0xdf /3" ], "ExpectedArm64ASM": [ - "eor w20, w26, w26, lsr #4", - "eor w20, w20, w20, lsr #2", - "eor w20, w20, w20, lsr #1", - "mrs x21, nzcv", - "tst w20, #0x1", - "csetm x20, ne", - "dup v2.2d, x20", - "ldrb w20, [x28, #747]", - "add w22, w20, #0x7 (7)", - "and w22, w22, #0x7", - "add x0, x28, x20, lsl #4", + "mov x20, x26", + "eor w21, w20, w20, lsr #4", + "eor w20, w21, w21, lsr #2", + "eor w21, w20, w20, lsr #1", + "mrs x20, nzcv", + "tst w21, #0x1", + "csetm x21, ne", + "dup v2.2d, x21", + "ldrb w21, [x28, #747]", + "add w22, w21, #0x7 (7)", + "and w23, w22, #0x7", + "add x0, x28, x21, lsl #4", "ldr q3, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "ldr q4, [x0, #768]", - "bsl v2.16b, v4.16b, v3.16b", - "add x0, x28, x20, lsl #4", - "str q2, [x0, #768]", - "msr nzcv, x21" + "mov v5.16b, v2.16b", + "bsl v5.16b, v4.16b, v3.16b", + "add x0, x28, x21, lsl #4", + "str q5, [x0, #768]", + "msr nzcv, x20" ] }, "fnclex": { @@ -5030,15 +5138,15 @@ ] }, "fucomi st0, st0": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 18, "Comment": [ "0xdb 11b 0xe8 /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -5046,24 +5154,25 @@ "cset w20, eq", "cset w21, lo", "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", + "orr w23, w21, w22", + "lsl x21, x23, #29", + "orr w23, w20, w22", + "orr w20, w21, w23, lsl #30", + "eor w21, w22, #0x1", + "mov x26, x21", "msr nzcv, x20" ] }, "fucomi st0, st1": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 18, "Comment": [ "0xdb 11b 0xe9 /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -5071,24 +5180,25 @@ "cset w20, eq", "cset w21, lo", "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", + "orr w23, w21, w22", + "lsl x21, x23, #29", + "orr w23, w20, w22", + "orr w20, w21, w23, lsl #30", + "eor w21, w22, #0x1", + "mov x26, x21", "msr nzcv, x20" ] }, "fucomi st0, st2": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 18, "Comment": [ "0xdb 11b 0xea /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -5096,24 +5206,25 @@ "cset w20, eq", "cset w21, lo", "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", + "orr w23, w21, w22", + "lsl x21, x23, #29", + "orr w23, w20, w22", + "orr w20, w21, w23, lsl #30", + "eor w21, w22, #0x1", + "mov x26, x21", "msr nzcv, x20" ] }, "fucomi st0, st3": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 18, "Comment": [ "0xdb 11b 0xeb /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -5121,24 +5232,25 @@ "cset w20, eq", "cset w21, lo", "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", + "orr w23, w21, w22", + "lsl x21, x23, #29", + "orr w23, w20, w22", + "orr w20, w21, w23, lsl #30", + "eor w21, w22, #0x1", + "mov x26, x21", "msr nzcv, x20" ] }, "fucomi st0, st4": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 18, "Comment": [ "0xdb 11b 0xec /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -5146,24 +5258,25 @@ "cset w20, eq", "cset w21, lo", "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", + "orr w23, w21, w22", + "lsl x21, x23, #29", + "orr w23, w20, w22", + "orr w20, w21, w23, lsl #30", + "eor w21, w22, #0x1", + "mov x26, x21", "msr nzcv, x20" ] }, "fucomi st0, st5": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 18, "Comment": [ "0xdb 11b 0xed /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -5171,24 +5284,25 @@ "cset w20, eq", "cset w21, lo", "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", + "orr w23, w21, w22", + "lsl x21, x23, #29", + "orr w23, w20, w22", + "orr w20, w21, w23, lsl #30", + "eor w21, w22, #0x1", + "mov x26, x21", "msr nzcv, x20" ] }, "fucomi st0, st6": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 18, "Comment": [ "0xdb 11b 0xee /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -5196,24 +5310,25 @@ "cset w20, eq", "cset w21, lo", "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", + "orr w23, w21, w22", + "lsl x21, x23, #29", + "orr w23, w20, w22", + "orr w20, w21, w23, lsl #30", + "eor w21, w22, #0x1", + "mov x26, x21", "msr nzcv, x20" ] }, "fucomi st0, st7": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 18, "Comment": [ "0xdb 11b 0xef /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -5221,24 +5336,25 @@ "cset w20, eq", "cset w21, lo", "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", + "orr w23, w21, w22", + "lsl x21, x23, #29", + "orr w23, w20, w22", + "orr w20, w21, w23, lsl #30", + "eor w21, w22, #0x1", + "mov x26, x21", "msr nzcv, x20" ] }, "fcomi st0, st0": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 18, "Comment": [ "0xdb 11b 0xf0 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -5246,24 +5362,25 @@ "cset w20, eq", "cset w21, lo", "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", + "orr w23, w21, w22", + "lsl x21, x23, #29", + "orr w23, w20, w22", + "orr w20, w21, w23, lsl #30", + "eor w21, w22, #0x1", + "mov x26, x21", "msr nzcv, x20" ] }, "fcomi st0, st1": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 18, "Comment": [ "0xdb 11b 0xf1 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -5271,24 +5388,25 @@ "cset w20, eq", "cset w21, lo", "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", + "orr w23, w21, w22", + "lsl x21, x23, #29", + "orr w23, w20, w22", + "orr w20, w21, w23, lsl #30", + "eor w21, w22, #0x1", + "mov x26, x21", "msr nzcv, x20" ] }, "fcomi st0, st2": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 18, "Comment": [ "0xdb 11b 0xf2 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -5296,24 +5414,25 @@ "cset w20, eq", "cset w21, lo", "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", + "orr w23, w21, w22", + "lsl x21, x23, #29", + "orr w23, w20, w22", + "orr w20, w21, w23, lsl #30", + "eor w21, w22, #0x1", + "mov x26, x21", "msr nzcv, x20" ] }, "fcomi st0, st3": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 18, "Comment": [ "0xdb 11b 0xf3 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -5321,24 +5440,25 @@ "cset w20, eq", "cset w21, lo", "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", + "orr w23, w21, w22", + "lsl x21, x23, #29", + "orr w23, w20, w22", + "orr w20, w21, w23, lsl #30", + "eor w21, w22, #0x1", + "mov x26, x21", "msr nzcv, x20" ] }, "fcomi st0, st4": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 18, "Comment": [ "0xdb 11b 0xf4 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -5346,24 +5466,25 @@ "cset w20, eq", "cset w21, lo", "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", + "orr w23, w21, w22", + "lsl x21, x23, #29", + "orr w23, w20, w22", + "orr w20, w21, w23, lsl #30", + "eor w21, w22, #0x1", + "mov x26, x21", "msr nzcv, x20" ] }, "fcomi st0, st5": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 18, "Comment": [ "0xdb 11b 0xf5 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -5371,24 +5492,25 @@ "cset w20, eq", "cset w21, lo", "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", + "orr w23, w21, w22", + "lsl x21, x23, #29", + "orr w23, w20, w22", + "orr w20, w21, w23, lsl #30", + "eor w21, w22, #0x1", + "mov x26, x21", "msr nzcv, x20" ] }, "fcomi st0, st6": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 18, "Comment": [ "0xdb 11b 0xf6 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -5396,24 +5518,25 @@ "cset w20, eq", "cset w21, lo", "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", + "orr w23, w21, w22", + "lsl x21, x23, #29", + "orr w23, w20, w22", + "orr w20, w21, w23, lsl #30", + "eor w21, w22, #0x1", + "mov x26, x21", "msr nzcv, x20" ] }, "fcomi st0, st7": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 18, "Comment": [ "0xdb 11b 0xf7 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -5421,52 +5544,56 @@ "cset w20, eq", "cset w21, lo", "cset w22, vs", - "orr w21, w21, w22", - "lsl x21, x21, #29", - "orr w20, w20, w22", - "orr w20, w21, w20, lsl #30", - "eor w26, w22, #0x1", + "orr w23, w21, w22", + "lsl x21, x23, #29", + "orr w23, w20, w22", + "orr w20, w21, w23, lsl #30", + "eor w21, w22, #0x1", + "mov x26, x21", "msr nzcv, x20" ] }, "fadd qword [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": [ "0xdc !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fmul qword [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": [ "0xdc !11b /1" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fcom qword [rax]": { - "ExpectedInstructionCount": 15, + "ExpectedInstructionCount": 16, "Comment": [ "0xdc !11b /2" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", @@ -5474,22 +5601,23 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] }, "fcomp qword [rax]": { - "ExpectedInstructionCount": 23, + "ExpectedInstructionCount": 24, "Comment": [ "0xdc !11b /3" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", "fcmp d3, d2", @@ -5498,79 +5626,83 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fsub qword [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": [ "0xdc !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fsubr qword [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": [ "0xdc !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdiv qword [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": [ "0xdc !11b /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fdivr qword [rax]": { - "ExpectedInstructionCount": 7, + "ExpectedInstructionCount": 8, "Comment": [ "0xdc !11b /7" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "db 0xdc, 0xc0": { @@ -5583,14 +5715,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fadd st1, st0": { @@ -5601,14 +5733,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fadd st2, st0": { @@ -5619,14 +5751,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fadd st3, st0": { @@ -5637,14 +5769,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fadd st4, st0": { @@ -5655,14 +5787,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fadd st5, st0": { @@ -5673,14 +5805,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fadd st6, st0": { @@ -5691,14 +5823,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fadd st7, st0": { @@ -5709,14 +5841,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fadd d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "db 0xdc, 0xc8": { @@ -5729,14 +5861,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmul st1, st0": { @@ -5747,14 +5879,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmul st2, st0": { @@ -5765,14 +5897,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmul st3, st0": { @@ -5783,14 +5915,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmul st4, st0": { @@ -5801,14 +5933,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmul st5, st0": { @@ -5819,14 +5951,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmul st6, st0": { @@ -5837,14 +5969,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmul st7, st0": { @@ -5855,14 +5987,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fmul d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "db 0xdc, 0xe0": { @@ -5875,14 +6007,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubr st1, st0": { @@ -5893,14 +6025,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubr st2, st0": { @@ -5911,14 +6043,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubr st3, st0": { @@ -5929,14 +6061,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubr st4, st0": { @@ -5947,14 +6079,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubr st5, st0": { @@ -5965,14 +6097,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubr st6, st0": { @@ -5983,14 +6115,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubr st7, st0": { @@ -6001,14 +6133,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "db 0xdc, 0xe8": { @@ -6021,14 +6153,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsub st1, st0": { @@ -6039,14 +6171,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsub st2, st0": { @@ -6057,14 +6189,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsub st3, st0": { @@ -6075,14 +6207,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsub st4, st0": { @@ -6093,14 +6225,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsub st5, st0": { @@ -6111,14 +6243,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsub st6, st0": { @@ -6129,14 +6261,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsub st7, st0": { @@ -6147,14 +6279,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fsub d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "db 0xdc, 0xf0": { @@ -6167,14 +6299,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivr st1, st0": { @@ -6185,14 +6317,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivr st2, st0": { @@ -6203,14 +6335,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivr st3, st0": { @@ -6221,14 +6353,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivr st4, st0": { @@ -6239,14 +6371,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivr st5, st0": { @@ -6257,14 +6389,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivr st6, st0": { @@ -6275,14 +6407,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivr st7, st0": { @@ -6293,14 +6425,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d3, d2", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "db 0xdc, 0xf8": { @@ -6313,14 +6445,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdiv st1, st0": { @@ -6331,14 +6463,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdiv st2, st0": { @@ -6349,14 +6481,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdiv st3, st0": { @@ -6367,14 +6499,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdiv st4, st0": { @@ -6385,14 +6517,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdiv st5, st0": { @@ -6403,14 +6535,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdiv st6, st0": { @@ -6421,14 +6553,14 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdiv st7, st0": { @@ -6439,30 +6571,31 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "fdiv d4, d2, d3", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fld qword [rax]": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdd !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldr d2, [x4]", + "mov x21, x4", + "ldr d2, [x21]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", "add x0, x28, x20, lsl #4", @@ -6470,7 +6603,7 @@ ] }, "fisttp qword [rax]": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdd !11b /1" ], @@ -6479,19 +6612,20 @@ "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", "fcvtzs x21, d2", - "str x21, [x4]", + "mov x22, x4", + "str x21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fst qword [rax]": { - "ExpectedInstructionCount": 4, + "ExpectedInstructionCount": 5, "Comment": [ "0xdd !11b /2" ], @@ -6499,11 +6633,12 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "str d2, [x4]" + "mov x20, x4", + "str d2, [x20]" ] }, "fstp qword [rax]": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 13, "Comment": [ "0xdd !11b /3" ], @@ -6511,87 +6646,90 @@ "ldrb w20, [x28, #747]", "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", - "str d2, [x4]", + "mov x21, x4", + "str d2, [x21]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "frstor [rax]": { - "ExpectedInstructionCount": 325, + "ExpectedInstructionCount": 328, "Comment": [ "0xdd !11b /4" ], "ExpectedArm64ASM": [ - "ldrh w20, [x4]", - "lsr w21, w20, #10", - "and w21, w21, #0x3", - "rbit w1, w21", + "mov x20, x4", + "ldrh w21, [x20]", + "lsr w22, w21, #10", + "and w23, w22, #0x3", + "rbit w1, w23", "lsr w1, w1, #30", "mrs x0, fpcr", "bfi x0, x1, #22, #2", - "lsr x1, x21, #2", + "lsr x1, x23, #2", "bfi x0, x1, #24, #1", "msr fpcr, x0", - "strh w20, [x28, #1024]", - "strh w20, [x28, #1024]", - "ldr w20, [x4, #4]", - "ubfx w21, w20, #11, #3", - "strb w21, [x28, #747]", - "ubfx w22, w20, #8, #1", - "ubfx w23, w20, #9, #1", - "ubfx w24, w20, #10, #1", - "ubfx w20, w20, #14, #1", - "strb w22, [x28, #744]", - "strb w23, [x28, #745]", - "strb w24, [x28, #746]", - "strb w20, [x28, #750]", - "ldr w20, [x4, #8]", - "ubfx w22, w20, #0, #2", - "mrs x23, nzcv", - "cmp x22, #0x3 (3)", - "cset x22, ne", - "ubfx w24, w20, #2, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #1", - "ubfx w24, w20, #4, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #2", - "ubfx w24, w20, #6, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #3", - "ubfx w24, w20, #8, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #4", - "ubfx w24, w20, #10, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #5", - "ubfx w24, w20, #12, #2", - "cmp x24, #0x3 (3)", - "cset x24, ne", - "orr w22, w22, w24, lsl #6", - "ubfx w20, w20, #14, #2", - "cmp x20, #0x3 (3)", - "cset x20, ne", - "orr w20, w22, w20, lsl #7", - "strb w20, [x28, #1026]", - "add x20, x4, #0x1c (28)", - "mov x22, #0xffffffffffffffff", - "mov w24, #0xffff", - "fmov d2, x22", - "mov v2.d[1], x24", - "ldur q3, [x4, #28]", - "and v3.16b, v3.16b, v2.16b", + "strh w21, [x28, #1024]", + "strh w21, [x28, #1024]", + "ldr w21, [x20, #4]", + "ubfx w22, w21, #11, #3", + "strb w22, [x28, #747]", + "ubfx w23, w21, #8, #1", + "ubfx w24, w21, #9, #1", + "ubfx w25, w21, #10, #1", + "ubfx w30, w21, #14, #1", + "strb w23, [x28, #744]", + "strb w24, [x28, #745]", + "strb w25, [x28, #746]", + "strb w30, [x28, #750]", + "ldr w21, [x20, #8]", + "ubfx w23, w21, #0, #2", + "mrs x24, nzcv", + "cmp x23, #0x3 (3)", + "cset x25, ne", + "ubfx w23, w21, #2, #2", + "cmp x23, #0x3 (3)", + "cset x30, ne", + "orr w23, w25, w30, lsl #1", + "ubfx w25, w21, #4, #2", + "cmp x25, #0x3 (3)", + "cset x30, ne", + "orr w25, w23, w30, lsl #2", + "ubfx w23, w21, #6, #2", + "cmp x23, #0x3 (3)", + "cset x30, ne", + "orr w23, w25, w30, lsl #3", + "ubfx w25, w21, #8, #2", + "cmp x25, #0x3 (3)", + "cset x30, ne", + "orr w25, w23, w30, lsl #4", + "ubfx w23, w21, #10, #2", + "cmp x23, #0x3 (3)", + "cset x30, ne", + "orr w23, w25, w30, lsl #5", + "ubfx w25, w21, #12, #2", + "cmp x25, #0x3 (3)", + "cset x30, ne", + "orr w25, w23, w30, lsl #6", + "ubfx w23, w21, #14, #2", + "cmp x23, #0x3 (3)", + "cset x21, ne", + "orr w23, w25, w21, lsl #7", + "strb w23, [x28, #1026]", + "add x21, x20, #0x1c (28)", + "mov x23, #0xffffffffffffffff", + "mov w25, #0xffff", + "fmov d2, x23", + "mov v3.16b, v2.16b", + "mov v3.d[1], x25", + "ldur q2, [x20, #28]", + "and v4.16b, v2.16b, v3.16b", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -6604,8 +6742,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", + "mov x1, v4.d[0]", + "umov w2, v4.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -6617,14 +6755,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v3.8b, v0.8b", - "add x0, x28, x21, lsl #4", - "str d3, [x0, #768]", - "add x22, x20, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x20, #10]", - "and v3.16b, v3.16b, v2.16b", + "mov v2.8b, v0.8b", + "add x0, x28, x22, lsl #4", + "str d2, [x0, #768]", + "add x20, x21, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x21, #10]", + "and v4.16b, v2.16b, v3.16b", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -6637,8 +6775,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", + "mov x1, v4.d[0]", + "umov w2, v4.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -6650,14 +6788,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v3.8b, v0.8b", - "add x0, x28, x21, lsl #4", - "str d3, [x0, #768]", - "add x20, x22, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x22, #10]", - "and v3.16b, v3.16b, v2.16b", + "mov v2.8b, v0.8b", + "add x0, x28, x22, lsl #4", + "str d2, [x0, #768]", + "add x21, x20, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x20, #10]", + "and v4.16b, v2.16b, v3.16b", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -6670,8 +6808,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", + "mov x1, v4.d[0]", + "umov w2, v4.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -6683,14 +6821,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v3.8b, v0.8b", - "add x0, x28, x21, lsl #4", - "str d3, [x0, #768]", - "add x22, x20, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x20, #10]", - "and v3.16b, v3.16b, v2.16b", + "mov v2.8b, v0.8b", + "add x0, x28, x22, lsl #4", + "str d2, [x0, #768]", + "add x20, x21, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x21, #10]", + "and v4.16b, v2.16b, v3.16b", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -6703,8 +6841,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", + "mov x1, v4.d[0]", + "umov w2, v4.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -6716,14 +6854,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v3.8b, v0.8b", - "add x0, x28, x21, lsl #4", - "str d3, [x0, #768]", - "add x20, x22, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x22, #10]", - "and v3.16b, v3.16b, v2.16b", + "mov v2.8b, v0.8b", + "add x0, x28, x22, lsl #4", + "str d2, [x0, #768]", + "add x21, x20, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x20, #10]", + "and v4.16b, v2.16b, v3.16b", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -6736,8 +6874,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", + "mov x1, v4.d[0]", + "umov w2, v4.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -6749,14 +6887,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v3.8b, v0.8b", - "add x0, x28, x21, lsl #4", - "str d3, [x0, #768]", - "add x22, x20, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x20, #10]", - "and v3.16b, v3.16b, v2.16b", + "mov v2.8b, v0.8b", + "add x0, x28, x22, lsl #4", + "str d2, [x0, #768]", + "add x20, x21, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x21, #10]", + "and v4.16b, v2.16b, v3.16b", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -6769,8 +6907,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v3.d[0]", - "umov w2, v3.h[4]", + "mov x1, v4.d[0]", + "umov w2, v4.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -6782,14 +6920,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "mov v3.8b, v0.8b", - "add x0, x28, x21, lsl #4", - "str d3, [x0, #768]", - "add x20, x22, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur q3, [x22, #10]", - "and v2.16b, v3.16b, v2.16b", + "mov v2.8b, v0.8b", + "add x0, x28, x22, lsl #4", + "str d2, [x0, #768]", + "add x21, x20, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur q2, [x20, #10]", + "and v4.16b, v2.16b, v3.16b", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -6802,8 +6940,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", + "mov x1, v4.d[0]", + "umov w2, v4.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -6816,14 +6954,15 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "mov v2.8b, v0.8b", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str d2, [x0, #768]", - "add x22, x20, #0xa (10)", - "add w21, w21, #0x1 (1)", - "and w21, w21, #0x7", - "ldur d2, [x20, #10]", - "ldr h3, [x22, #8]", - "mov v2.h[4], v3.h[0]", + "add x20, x21, #0xa (10)", + "add w23, w22, #0x1 (1)", + "and w22, w23, #0x7", + "ldur d2, [x21, #10]", + "ldr h3, [x20, #8]", + "mov v4.16b, v2.16b", + "mov v4.h[4], v3.h[0]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -6836,8 +6975,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", + "mov x1, v4.d[0]", + "umov w2, v4.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -6850,81 +6989,85 @@ "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", "mov v2.8b, v0.8b", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str d2, [x0, #768]", - "msr nzcv, x23" + "msr nzcv, x24" ] }, "fnsave [rax]": { - "ExpectedInstructionCount": 335, + "ExpectedInstructionCount": 340, "Comment": [ "0xdd !11b /6" ], "ExpectedArm64ASM": [ - "ldrb w20, [x28, #747]", - "ldrh w21, [x28, #1024]", - "str w21, [x4]", - "mov w21, #0x0", - "mov x22, x21", - "bfi x22, x20, #11, #3", - "ldrb w23, [x28, #744]", - "ldrb w24, [x28, #745]", - "ldrb w25, [x28, #746]", - "ldrb w30, [x28, #750]", - "orr x22, x22, x23, lsl #8", - "orr x22, x22, x24, lsl #9", - "orr x22, x22, x25, lsl #10", - "orr x22, x22, x30, lsl #14", - "str w22, [x4, #4]", - "ldrb w22, [x28, #1026]", - "and w23, w22, #0x1", + "sub sp, sp, #0x20 (32)", + "mov x20, x4", + "ldrb w21, [x28, #747]", + "ldrh w22, [x28, #1024]", + "str w22, [x20]", + "mov w22, #0x0", + "mov x23, x22", + "bfi x23, x21, #11, #3", + "ldrb w24, [x28, #744]", + "ldrb w25, [x28, #745]", + "ldrb w30, [x28, #746]", + "ldrb w18, [x28, #750]", + "strb w21, [sp]", + "orr x21, x23, x24, lsl #8", + "orr x23, x21, x25, lsl #9", + "orr x21, x23, x30, lsl #10", + "orr x23, x21, x18, lsl #14", + "str w23, [x20, #4]", + "ldrb w21, [x28, #1026]", + "and w23, w21, #0x1", "mov w24, #0x3", "mrs x25, nzcv", "cmp x23, #0x0 (0)", - "csel x23, x24, x21, eq", - "orr w23, w21, w23", - "lsr w30, w22, #1", - "and w30, w30, #0x1", - "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #2", - "lsr w30, w22, #2", - "and w30, w30, #0x1", + "csel x30, x24, x22, eq", + "orr w23, w22, w30", + "lsr w30, w21, #1", + "and w18, w30, #0x1", + "cmp x18, #0x0 (0)", + "csel x30, x24, x22, eq", + "orr w18, w23, w30, lsl #2", + "lsr w23, w21, #2", + "and w30, w23, #0x1", "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #4", - "lsr w30, w22, #3", - "and w30, w30, #0x1", - "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #6", - "lsr w30, w22, #4", - "and w30, w30, #0x1", - "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #8", - "lsr w30, w22, #5", - "and w30, w30, #0x1", + "csel x23, x24, x22, eq", + "orr w30, w18, w23, lsl #4", + "lsr w23, w21, #3", + "and w18, w23, #0x1", + "cmp x18, #0x0 (0)", + "csel x23, x24, x22, eq", + "orr w18, w30, w23, lsl #6", + "lsr w23, w21, #4", + "and w30, w23, #0x1", "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #10", - "lsr w30, w22, #6", - "and w30, w30, #0x1", + "csel x23, x24, x22, eq", + "orr w30, w18, w23, lsl #8", + "lsr w23, w21, #5", + "and w18, w23, #0x1", + "cmp x18, #0x0 (0)", + "csel x23, x24, x22, eq", + "orr w18, w30, w23, lsl #10", + "lsr w23, w21, #6", + "and w30, w23, #0x1", "cmp x30, #0x0 (0)", - "csel x30, x24, x21, eq", - "orr w23, w23, w30, lsl #12", - "lsr w22, w22, #7", - "and w22, w22, #0x1", - "cmp x22, #0x0 (0)", - "csel x22, x24, x21, eq", - "orr w22, w23, w22, lsl #14", - "str w22, [x4, #8]", - "str w21, [x4, #12]", - "str w21, [x4, #16]", - "str w21, [x4, #20]", - "str w21, [x4, #24]", - "add x22, x4, #0x1c (28)", - "add x0, x28, x20, lsl #4", + "csel x23, x24, x22, eq", + "orr w30, w18, w23, lsl #12", + "lsr w23, w21, #7", + "and w21, w23, #0x1", + "cmp x21, #0x0 (0)", + "csel x23, x24, x22, eq", + "orr w21, w30, w23, lsl #14", + "str w21, [x20, #8]", + "str w22, [x20, #12]", + "str w22, [x20, #16]", + "str w22, [x20, #20]", + "str w22, [x20, #24]", + "add x21, x20, #0x1c (28)", + "ldrb w23, [sp]", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -6950,14 +7093,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "stur q2, [x4, #28]", - "add x23, x22, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "stur q3, [x20, #28]", + "add x20, x21, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -6983,14 +7126,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "stur q2, [x22, #10]", - "add x22, x23, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "stur q3, [x21, #10]", + "add x21, x20, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -7016,14 +7159,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "stur q2, [x23, #10]", - "add x23, x22, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "stur q3, [x20, #10]", + "add x20, x21, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -7049,14 +7192,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "stur q2, [x22, #10]", - "add x22, x23, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "stur q3, [x21, #10]", + "add x21, x20, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -7082,14 +7225,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "stur q2, [x23, #10]", - "add x23, x22, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "stur q3, [x20, #10]", + "add x20, x21, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -7115,14 +7258,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "stur q2, [x22, #10]", - "add x22, x23, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "stur q3, [x21, #10]", + "add x21, x20, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -7148,14 +7291,14 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "stur q2, [x23, #10]", - "add x23, x22, #0xa (10)", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "add x0, x28, x20, lsl #4", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "stur q3, [x20, #10]", + "add x20, x21, #0xa (10)", + "add w24, w23, #0x1 (1)", + "and w23, w24, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "mrs x0, nzcv", "str w0, [x28, #728]", @@ -7181,41 +7324,44 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", - "stur d2, [x22, #10]", - "dup v2.8h, v2.h[4]", - "str h2, [x23, #8]", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", + "stur d3, [x21, #10]", + "dup v2.8h, v3.h[4]", + "str h2, [x20, #8]", "mov w20, #0x37f", "strh w20, [x28, #1024]", - "strb w21, [x28, #747]", - "strb w21, [x28, #744]", - "strb w21, [x28, #745]", - "strb w21, [x28, #746]", - "strb w21, [x28, #750]", - "strb w21, [x28, #1026]", - "msr nzcv, x25" + "strb w22, [x28, #747]", + "strb w22, [x28, #744]", + "strb w22, [x28, #745]", + "strb w22, [x28, #746]", + "strb w22, [x28, #750]", + "strb w22, [x28, #1026]", + "msr nzcv, x25", + "add sp, sp, #0x20 (32)" ] }, "fnstsw [rax]": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 14, "Comment": [ "0xdd !11b /7" ], "ExpectedArm64ASM": [ "mov w20, #0x0", "ldrb w21, [x28, #747]", - "bfi x20, x21, #11, #3", - "ldrb w21, [x28, #744]", - "ldrb w22, [x28, #745]", + "mov x22, x20", + "bfi x22, x21, #11, #3", + "ldrb w20, [x28, #744]", + "ldrb w21, [x28, #745]", "ldrb w23, [x28, #746]", "ldrb w24, [x28, #750]", - "orr x20, x20, x21, lsl #8", - "orr x20, x20, x22, lsl #9", - "orr x20, x20, x23, lsl #10", - "orr x20, x20, x24, lsl #14", - "strh w20, [x4]" + "orr x25, x22, x20, lsl #8", + "orr x20, x25, x21, lsl #9", + "orr x21, x20, x23, lsl #10", + "orr x20, x21, x24, lsl #14", + "mov x21, x4", + "strh w20, [x21]" ] }, "ffree st0": { @@ -7225,12 +7371,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x0 (0)", - "and w20, w20, #0x7", + "add w21, w20, #0x0 (0)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -7242,11 +7388,11 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w20, w21, w20", - "bic w20, w22, w20", + "lsl w23, w21, w20", + "bic w20, w22, w23", "strb w20, [x28, #1026]" ] }, @@ -7257,12 +7403,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x2 (2)", - "and w20, w20, #0x7", + "add w21, w20, #0x2 (2)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -7273,12 +7419,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x3 (3)", - "and w20, w20, #0x7", + "add w21, w20, #0x3 (3)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -7289,12 +7435,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x4 (4)", - "and w20, w20, #0x7", + "add w21, w20, #0x4 (4)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -7305,12 +7451,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x5 (5)", - "and w20, w20, #0x7", + "add w21, w20, #0x5 (5)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -7321,12 +7467,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x6 (6)", - "and w20, w20, #0x7", + "add w21, w20, #0x6 (6)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -7337,12 +7483,12 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x7 (7)", - "and w20, w20, #0x7", + "add w21, w20, #0x7 (7)", + "and w20, w21, #0x7", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w20, w22, w20", - "bic w20, w21, w20", + "lsl w23, w22, w20", + "bic w20, w21, w23", "strb w20, [x28, #1026]" ] }, @@ -7354,10 +7500,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -7369,10 +7515,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -7384,10 +7530,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -7399,10 +7545,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -7414,10 +7560,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -7429,10 +7575,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -7444,10 +7590,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -7459,10 +7605,10 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]" ] }, @@ -7474,18 +7620,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7498,17 +7644,17 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", + "and w23, w22, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x22, lsl #4", + "add x0, x28, x23, lsl #4", "str q2, [x0, #768]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7520,18 +7666,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7543,18 +7689,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7566,18 +7712,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7589,18 +7735,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7612,18 +7758,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7635,18 +7781,18 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", + "and w22, w21, #0x7", "add x0, x28, x20, lsl #4", "ldr q2, [x0, #768]", - "add x0, x28, x21, lsl #4", + "add x0, x28, x22, lsl #4", "str q2, [x0, #768]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7659,8 +7805,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x0", "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7668,10 +7814,10 @@ "cset w20, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w20", - "strb w23, [x28, #744]", - "orr w22, w22, w20", - "strb w22, [x28, #750]", + "orr w24, w23, w20", + "strb w24, [x28, #744]", + "orr w23, w22, w20", + "strb w23, [x28, #750]", "strb w21, [x28, #745]", "strb w20, [x28, #746]" ] @@ -7684,8 +7830,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x1 (1)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7694,10 +7840,10 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] @@ -7710,8 +7856,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7720,10 +7866,10 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] @@ -7736,8 +7882,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7746,10 +7892,10 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] @@ -7762,8 +7908,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7772,10 +7918,10 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] @@ -7788,8 +7934,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7798,10 +7944,10 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] @@ -7814,8 +7960,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7824,10 +7970,10 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] @@ -7840,8 +7986,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7850,10 +7996,10 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] @@ -7867,8 +8013,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x0", "add w22, w20, #0x0 (0)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7877,18 +8023,18 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w21, [x28, #745]", "strb w23, [x28, #746]", "ldrb w21, [x28, #1026]", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7901,8 +8047,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7911,18 +8057,18 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7934,8 +8080,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7945,18 +8091,18 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -7968,8 +8114,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -7979,18 +8125,18 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -8002,8 +8148,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -8013,18 +8159,18 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -8036,8 +8182,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -8047,18 +8193,18 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -8070,8 +8216,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -8081,18 +8227,18 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -8104,8 +8250,8 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -8115,64 +8261,67 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fiadd word [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": [ "0xde !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, w21", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", + "fadd d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fimul word [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": [ "0xde !11b /1" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, w21", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", + "fmul d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "ficom word [rax]": { - "ExpectedInstructionCount": 17, + "ExpectedInstructionCount": 18, "Comment": [ "0xde !11b /2" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, w21", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -8181,23 +8330,24 @@ "cset w21, vs", "cset w22, eq", "cset w23, mi", - "orr w23, w23, w21", - "strb w23, [x28, #744]", - "orr w22, w22, w21", - "strb w22, [x28, #750]", + "orr w24, w23, w21", + "strb w24, [x28, #744]", + "orr w23, w22, w21", + "strb w23, [x28, #750]", "strb w20, [x28, #745]", "strb w21, [x28, #746]" ] }, "ficomp word [rax]": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 26, "Comment": [ "0xde !11b /3" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, w21", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -8207,87 +8357,91 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "lsl w23, w21, w20", + "bic w21, w22, w23", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fisub word [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": [ "0xde !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, w21", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", + "fsub d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fisubr word [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": [ "0xde !11b /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, w21", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", + "fsub d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fidiv word [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": [ "0xde !11b /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, w21", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", + "fdiv d4, d3, d2", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "fidivr word [rax]": { - "ExpectedInstructionCount": 9, + "ExpectedInstructionCount": 10, "Comment": [ "0xde !11b /7" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, w21", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", + "fdiv d4, d2, d3", "add x0, x28, x20, lsl #4", - "str d2, [x0, #768]" + "str d4, [x0, #768]" ] }, "faddp st0": { @@ -8298,22 +8452,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fadd d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "faddp st1": { @@ -8325,21 +8479,21 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "fadd d4, d3, d2", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str d4, [x0, #768]" ] }, "faddp st2": { @@ -8350,22 +8504,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fadd d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "faddp st3": { @@ -8376,22 +8530,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fadd d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "faddp st4": { @@ -8402,22 +8556,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fadd d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "faddp st5": { @@ -8428,22 +8582,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fadd d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "faddp st6": { @@ -8454,22 +8608,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fadd d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "faddp st7": { @@ -8480,22 +8634,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fadd d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fadd d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmulp st0": { @@ -8506,22 +8660,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fmul d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmulp st1": { @@ -8533,21 +8687,21 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "fmul d4, d3, d2", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str d4, [x0, #768]" ] }, "fmulp st2": { @@ -8558,22 +8712,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fmul d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmulp st3": { @@ -8584,22 +8738,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fmul d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmulp st4": { @@ -8610,22 +8764,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fmul d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmulp st5": { @@ -8636,22 +8790,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fmul d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmulp st6": { @@ -8662,22 +8816,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fmul d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fmulp st7": { @@ -8688,22 +8842,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fmul d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fmul d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fcompp": { @@ -8715,8 +8869,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -8725,23 +8879,23 @@ "cset w23, vs", "cset w24, eq", "cset w25, mi", - "orr w25, w25, w23", - "strb w25, [x28, #744]", - "orr w24, w24, w23", - "strb w24, [x28, #750]", + "orr w30, w25, w23", + "strb w30, [x28, #744]", + "orr w25, w24, w23", + "strb w25, [x28, #750]", "strb w22, [x28, #745]", "strb w23, [x28, #746]", "ldrb w22, [x28, #1026]", "lsl w23, w21, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "lsl w21, w21, w20", - "bic w21, w22, w21", + "bic w24, w22, w23", + "strb w24, [x28, #1026]", + "add w22, w20, #0x1 (1)", + "and w20, w22, #0x7", + "lsl w22, w21, w20", + "bic w21, w24, w22", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -8755,22 +8909,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fsub d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubrp st1, st0": { @@ -8782,21 +8936,21 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "fsub d4, d3, d2", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str d4, [x0, #768]" ] }, "fsubrp st2, st0": { @@ -8807,22 +8961,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fsub d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubrp st3, st0": { @@ -8833,22 +8987,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fsub d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubrp st4, st0": { @@ -8859,22 +9013,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fsub d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubrp st5, st0": { @@ -8885,22 +9039,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fsub d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubrp st6, st0": { @@ -8911,22 +9065,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fsub d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubrp st7, st0": { @@ -8937,22 +9091,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fsub d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "db 0xde, 0xe8": { @@ -8965,22 +9119,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fsub d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubp st1, st0": { @@ -8992,21 +9146,21 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "fsub d4, d2, d3", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str d4, [x0, #768]" ] }, "fsubp st2, st0": { @@ -9017,22 +9171,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fsub d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubp st3, st0": { @@ -9043,22 +9197,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fsub d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubp st4, st0": { @@ -9069,22 +9223,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fsub d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubp st5, st0": { @@ -9095,22 +9249,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fsub d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubp st6, st0": { @@ -9121,22 +9275,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fsub d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fsubp st7, st0": { @@ -9147,22 +9301,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fsub d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fsub d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "db 0xde, 0xf0": { @@ -9175,22 +9329,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fdiv d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", - "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", + "strb w20, [x28, #747]", + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivrp st1, st0": { @@ -9202,21 +9356,21 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "fdiv d4, d3, d2", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str d4, [x0, #768]" ] }, "fdivrp st2, st0": { @@ -9227,22 +9381,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fdiv d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivrp st3, st0": { @@ -9253,22 +9407,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fdiv d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivrp st4, st0": { @@ -9279,22 +9433,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fdiv d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivrp st5, st0": { @@ -9305,22 +9459,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fdiv d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivrp st6, st0": { @@ -9331,22 +9485,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fdiv d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivrp st7, st0": { @@ -9357,22 +9511,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d3, d2", - "ldrb w22, [x28, #1026]", + "fdiv d4, d3, d2", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "db 0xde, 0xf8": { @@ -9385,22 +9539,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fdiv d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivp st1, st0": { @@ -9412,21 +9566,21 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "fdiv d4, d2, d3", + "ldrb w22, [x28, #1026]", + "lsl w24, w21, w20", + "bic w21, w22, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x22, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x23, lsl #4", + "str d4, [x0, #768]" ] }, "fdivp st2, st0": { @@ -9437,22 +9591,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fdiv d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivp st3, st0": { @@ -9463,22 +9617,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fdiv d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivp st4, st0": { @@ -9489,22 +9643,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fdiv d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivp st5, st0": { @@ -9515,22 +9669,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fdiv d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivp st6, st0": { @@ -9541,22 +9695,22 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fdiv d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fdivp st7, st0": { @@ -9567,48 +9721,49 @@ "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", - "fdiv d2, d2, d3", - "ldrb w22, [x28, #1026]", + "fdiv d4, d2, d3", + "ldrb w21, [x28, #1026]", "mov w23, #0x1", - "lsl w23, w23, w20", - "bic w22, w22, w23", - "strb w22, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w24, w23, w20", + "bic w23, w21, w24", + "strb w23, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", - "add x0, x28, x21, lsl #4", - "str d2, [x0, #768]" + "add x0, x28, x22, lsl #4", + "str d4, [x0, #768]" ] }, "fild word [rax]": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xdf !11b /0" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "ldrh w21, [x4]", - "sxth x21, w21", + "mov x21, x4", + "ldrh w22, [x21]", + "sxth x21, w22", "scvtf d2, x21", "add x0, x28, x20, lsl #4", "str d2, [x0, #768]" ] }, "fisttp word [rax]": { - "ExpectedInstructionCount": 13, + "ExpectedInstructionCount": 14, "Comment": [ "0xdf !11b /1" ], @@ -9617,19 +9772,20 @@ "add x0, x28, x20, lsl #4", "ldr d2, [x0, #768]", "fcvtzs x21, d2", - "strh w21, [x4]", + "mov x22, x4", + "strh w21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fist word [rax]": { - "ExpectedInstructionCount": 6, + "ExpectedInstructionCount": 7, "Comment": [ "0xdf !11b /2" ], @@ -9639,11 +9795,12 @@ "ldr d2, [x0, #768]", "frinti d0, d2", "fcvtzs x20, d0", - "strh w20, [x4]" + "mov x21, x4", + "strh w20, [x21]" ] }, "fistp word [rax]": { - "ExpectedInstructionCount": 14, + "ExpectedInstructionCount": 15, "Comment": [ "0xdf !11b /3" ], @@ -9653,33 +9810,35 @@ "ldr d2, [x0, #768]", "frinti d0, d2", "fcvtzs x21, d0", - "strh w21, [x4]", + "mov x22, x4", + "strh w21, [x22]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fbld tword [rax]": { - "ExpectedInstructionCount": 66, + "ExpectedInstructionCount": 67, "Comment": [ "0xdf !11b /4" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "mov w21, #0x1", - "sub w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "sub w22, w20, #0x1 (1)", + "and w20, w22, #0x7", "ldrb w22, [x28, #1026]", - "lsl w21, w21, w20", - "orr w21, w22, w21", + "lsl w23, w21, w20", + "orr w21, w22, w23", "strb w21, [x28, #1026]", "strb w20, [x28, #747]", - "ldr q2, [x4]", + "mov x21, x4", + "ldr q2, [x21]", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -9705,9 +9864,9 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -9720,8 +9879,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", + "mov x1, v3.d[0]", + "umov w2, v3.h[4]", "ldr x3, [x28, #1192]", "blr x3", "ldr w4, [x28, #728]", @@ -9739,7 +9898,7 @@ ] }, "fbstp tword [rax]": { - "ExpectedInstructionCount": 69, + "ExpectedInstructionCount": 70, "Comment": [ "0xdf !11b /6" ], @@ -9771,9 +9930,9 @@ "ld1 {v2.2d, v3.2d}, [sp], #32", "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64", "ldr x30, [sp], #16", - "eor v2.16b, v2.16b, v2.16b", - "mov v2.d[0], x0", - "mov v2.h[4], w1", + "eor v3.16b, v3.16b, v3.16b", + "mov v3.d[0], x0", + "mov v3.h[4], w1", "mrs x0, nzcv", "str w0, [x28, #728]", "stp x4, x5, [x28, #8]", @@ -9786,8 +9945,8 @@ "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64", "str x30, [x0], #16", "ldrh w0, [x28, #1024]", - "mov x1, v2.d[0]", - "umov w2, v2.h[4]", + "mov x1, v3.d[0]", + "umov w2, v3.h[4]", "ldr x3, [x28, #1392]", "blr x3", "ldr w4, [x28, #728]", @@ -9802,16 +9961,17 @@ "eor v2.16b, v2.16b, v2.16b", "mov v2.d[0], x0", "mov v2.h[4], w1", - "str d2, [x4]", - "mov x21, v2.d[1]", - "strh w21, [x4, #8]", + "mov x21, x4", + "str d2, [x21]", + "mov x22, v2.d[1]", + "strh w22, [x21, #8]", "ldrb w21, [x28, #1026]", "mov w22, #0x1", - "lsl w22, w22, w20", - "bic w21, w21, w22", - "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "lsl w23, w22, w20", + "bic w22, w21, w23", + "strb w22, [x28, #1026]", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -9822,8 +9982,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -9834,8 +9994,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -9846,8 +10006,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -9858,8 +10018,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -9870,8 +10030,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -9882,8 +10042,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -9894,8 +10054,8 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, @@ -9906,41 +10066,45 @@ ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]" ] }, "fnstsw ax": { - "ExpectedInstructionCount": 12, + "ExpectedInstructionCount": 16, "Comment": [ "0xdf 11b 0xe0 /4" ], "ExpectedArm64ASM": [ "mov w20, #0x0", "ldrb w21, [x28, #747]", - "bfi x20, x21, #11, #3", - "ldrb w21, [x28, #744]", - "ldrb w22, [x28, #745]", + "mov x22, x20", + "bfi x22, x21, #11, #3", + "ldrb w20, [x28, #744]", + "ldrb w21, [x28, #745]", "ldrb w23, [x28, #746]", "ldrb w24, [x28, #750]", - "orr x20, x20, x21, lsl #8", - "orr x20, x20, x22, lsl #9", - "orr x20, x20, x23, lsl #10", - "orr x20, x20, x24, lsl #14", - "bfxil x4, x20, #0, #16" + "orr x25, x22, x20, lsl #8", + "orr x20, x25, x21, lsl #9", + "orr x21, x20, x23, lsl #10", + "orr x20, x21, x24, lsl #14", + "mov x21, x4", + "mov x22, x21", + "bfxil x22, x20, #0, #16", + "mov x4, x22" ] }, "fucomip st0": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 26, "Comment": [ "0xdf 11b 0xe8 /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -9949,23 +10113,24 @@ "cset w22, eq", "cset w23, lo", "cset w24, vs", - "orr w23, w23, w24", - "lsl x23, x23, #29", - "orr w22, w22, w24", - "orr w22, w23, w22, lsl #30", - "eor w26, w24, #0x1", + "orr w25, w23, w24", + "lsl x23, x25, #29", + "orr w25, w22, w24", + "orr w22, w23, w25, lsl #30", + "eor w23, w24, #0x1", + "mov x26, x23", "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", "msr nzcv, x22" ] }, "fucomip st1": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 26, "Comment": [ "0xdf 11b 0xe9 /5" ], @@ -9973,8 +10138,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -9982,31 +10147,32 @@ "cset w22, eq", "cset w23, lo", "cset w24, vs", - "orr w23, w23, w24", - "lsl x23, x23, #29", - "orr w22, w22, w24", - "orr w22, w23, w22, lsl #30", - "eor w26, w24, #0x1", + "orr w25, w23, w24", + "lsl x23, x25, #29", + "orr w25, w22, w24", + "orr w22, w23, w25, lsl #30", + "eor w23, w24, #0x1", + "mov x26, x23", "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", "msr nzcv, x22" ] }, "fucomip st2": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 26, "Comment": [ "0xdf 11b 0xea /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -10015,31 +10181,32 @@ "cset w22, eq", "cset w23, lo", "cset w24, vs", - "orr w23, w23, w24", - "lsl x23, x23, #29", - "orr w22, w22, w24", - "orr w22, w23, w22, lsl #30", - "eor w26, w24, #0x1", + "orr w25, w23, w24", + "lsl x23, x25, #29", + "orr w25, w22, w24", + "orr w22, w23, w25, lsl #30", + "eor w23, w24, #0x1", + "mov x26, x23", "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", "msr nzcv, x22" ] }, "fucomip st3": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 26, "Comment": [ "0xdf 11b 0xeb /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -10048,31 +10215,32 @@ "cset w22, eq", "cset w23, lo", "cset w24, vs", - "orr w23, w23, w24", - "lsl x23, x23, #29", - "orr w22, w22, w24", - "orr w22, w23, w22, lsl #30", - "eor w26, w24, #0x1", + "orr w25, w23, w24", + "lsl x23, x25, #29", + "orr w25, w22, w24", + "orr w22, w23, w25, lsl #30", + "eor w23, w24, #0x1", + "mov x26, x23", "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", "msr nzcv, x22" ] }, "fucomip st4": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 26, "Comment": [ "0xdf 11b 0xec /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -10081,31 +10249,32 @@ "cset w22, eq", "cset w23, lo", "cset w24, vs", - "orr w23, w23, w24", - "lsl x23, x23, #29", - "orr w22, w22, w24", - "orr w22, w23, w22, lsl #30", - "eor w26, w24, #0x1", + "orr w25, w23, w24", + "lsl x23, x25, #29", + "orr w25, w22, w24", + "orr w22, w23, w25, lsl #30", + "eor w23, w24, #0x1", + "mov x26, x23", "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", "msr nzcv, x22" ] }, "fucomip st5": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 26, "Comment": [ "0xdf 11b 0xed /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -10114,31 +10283,32 @@ "cset w22, eq", "cset w23, lo", "cset w24, vs", - "orr w23, w23, w24", - "lsl x23, x23, #29", - "orr w22, w22, w24", - "orr w22, w23, w22, lsl #30", - "eor w26, w24, #0x1", + "orr w25, w23, w24", + "lsl x23, x25, #29", + "orr w25, w22, w24", + "orr w22, w23, w25, lsl #30", + "eor w23, w24, #0x1", + "mov x26, x23", "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", "msr nzcv, x22" ] }, "fucomip st6": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 26, "Comment": [ "0xdf 11b 0xee /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -10147,31 +10317,32 @@ "cset w22, eq", "cset w23, lo", "cset w24, vs", - "orr w23, w23, w24", - "lsl x23, x23, #29", - "orr w22, w22, w24", - "orr w22, w23, w22, lsl #30", - "eor w26, w24, #0x1", + "orr w25, w23, w24", + "lsl x23, x25, #29", + "orr w25, w22, w24", + "orr w22, w23, w25, lsl #30", + "eor w23, w24, #0x1", + "mov x26, x23", "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", "msr nzcv, x22" ] }, "fucomip st7": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 26, "Comment": [ "0xdf 11b 0xef /5" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -10180,31 +10351,32 @@ "cset w22, eq", "cset w23, lo", "cset w24, vs", - "orr w23, w23, w24", - "lsl x23, x23, #29", - "orr w22, w22, w24", - "orr w22, w23, w22, lsl #30", - "eor w26, w24, #0x1", + "orr w25, w23, w24", + "lsl x23, x25, #29", + "orr w25, w22, w24", + "orr w22, w23, w25, lsl #30", + "eor w23, w24, #0x1", + "mov x26, x23", "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", "msr nzcv, x22" ] }, "fcomip st0": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 26, "Comment": [ "0xdf 11b 0xf0 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x0 (0)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -10213,23 +10385,24 @@ "cset w22, eq", "cset w23, lo", "cset w24, vs", - "orr w23, w23, w24", - "lsl x23, x23, #29", - "orr w22, w22, w24", - "orr w22, w23, w22, lsl #30", - "eor w26, w24, #0x1", + "orr w25, w23, w24", + "lsl x23, x25, #29", + "orr w25, w22, w24", + "orr w22, w23, w25, lsl #30", + "eor w23, w24, #0x1", + "mov x26, x23", "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", "msr nzcv, x22" ] }, "fcomip st1": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 26, "Comment": [ "0xdf 11b 0xf1 /6" ], @@ -10237,8 +10410,8 @@ "ldrb w20, [x28, #747]", "mov w21, #0x1", "add w22, w20, #0x1 (1)", - "and w22, w22, #0x7", - "add x0, x28, x22, lsl #4", + "and w23, w22, #0x7", + "add x0, x28, x23, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -10246,31 +10419,32 @@ "cset w22, eq", "cset w23, lo", "cset w24, vs", - "orr w23, w23, w24", - "lsl x23, x23, #29", - "orr w22, w22, w24", - "orr w22, w23, w22, lsl #30", - "eor w26, w24, #0x1", + "orr w25, w23, w24", + "lsl x23, x25, #29", + "orr w25, w22, w24", + "orr w22, w23, w25, lsl #30", + "eor w23, w24, #0x1", + "mov x26, x23", "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", "msr nzcv, x22" ] }, "fcomip st2": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 26, "Comment": [ "0xdf 11b 0xf2 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x2 (2)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -10279,31 +10453,32 @@ "cset w22, eq", "cset w23, lo", "cset w24, vs", - "orr w23, w23, w24", - "lsl x23, x23, #29", - "orr w22, w22, w24", - "orr w22, w23, w22, lsl #30", - "eor w26, w24, #0x1", + "orr w25, w23, w24", + "lsl x23, x25, #29", + "orr w25, w22, w24", + "orr w22, w23, w25, lsl #30", + "eor w23, w24, #0x1", + "mov x26, x23", "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", "msr nzcv, x22" ] }, "fcomip st3": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 26, "Comment": [ "0xdf 11b 0xf3 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x3 (3)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -10312,31 +10487,32 @@ "cset w22, eq", "cset w23, lo", "cset w24, vs", - "orr w23, w23, w24", - "lsl x23, x23, #29", - "orr w22, w22, w24", - "orr w22, w23, w22, lsl #30", - "eor w26, w24, #0x1", + "orr w25, w23, w24", + "lsl x23, x25, #29", + "orr w25, w22, w24", + "orr w22, w23, w25, lsl #30", + "eor w23, w24, #0x1", + "mov x26, x23", "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", "msr nzcv, x22" ] }, "fcomip st4": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 26, "Comment": [ "0xdf 11b 0xf4 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x4 (4)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -10345,31 +10521,32 @@ "cset w22, eq", "cset w23, lo", "cset w24, vs", - "orr w23, w23, w24", - "lsl x23, x23, #29", - "orr w22, w22, w24", - "orr w22, w23, w22, lsl #30", - "eor w26, w24, #0x1", + "orr w25, w23, w24", + "lsl x23, x25, #29", + "orr w25, w22, w24", + "orr w22, w23, w25, lsl #30", + "eor w23, w24, #0x1", + "mov x26, x23", "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", "msr nzcv, x22" ] }, "fcomip st5": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 26, "Comment": [ "0xdf 11b 0xf5 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x5 (5)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -10378,31 +10555,32 @@ "cset w22, eq", "cset w23, lo", "cset w24, vs", - "orr w23, w23, w24", - "lsl x23, x23, #29", - "orr w22, w22, w24", - "orr w22, w23, w22, lsl #30", - "eor w26, w24, #0x1", + "orr w25, w23, w24", + "lsl x23, x25, #29", + "orr w25, w22, w24", + "orr w22, w23, w25, lsl #30", + "eor w23, w24, #0x1", + "mov x26, x23", "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", "msr nzcv, x22" ] }, "fcomip st6": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 26, "Comment": [ "0xdf 11b 0xf6 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x6 (6)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -10411,31 +10589,32 @@ "cset w22, eq", "cset w23, lo", "cset w24, vs", - "orr w23, w23, w24", - "lsl x23, x23, #29", - "orr w22, w22, w24", - "orr w22, w23, w22, lsl #30", - "eor w26, w24, #0x1", + "orr w25, w23, w24", + "lsl x23, x25, #29", + "orr w25, w22, w24", + "orr w22, w23, w25, lsl #30", + "eor w23, w24, #0x1", + "mov x26, x23", "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", "msr nzcv, x22" ] }, "fcomip st7": { - "ExpectedInstructionCount": 25, + "ExpectedInstructionCount": 26, "Comment": [ "0xdf 11b 0xf7 /6" ], "ExpectedArm64ASM": [ "ldrb w20, [x28, #747]", "add w21, w20, #0x7 (7)", - "and w21, w21, #0x7", - "add x0, x28, x21, lsl #4", + "and w22, w21, #0x7", + "add x0, x28, x22, lsl #4", "ldr d2, [x0, #768]", "add x0, x28, x20, lsl #4", "ldr d3, [x0, #768]", @@ -10444,17 +10623,18 @@ "cset w22, eq", "cset w23, lo", "cset w24, vs", - "orr w23, w23, w24", - "lsl x23, x23, #29", - "orr w22, w22, w24", - "orr w22, w23, w22, lsl #30", - "eor w26, w24, #0x1", + "orr w25, w23, w24", + "lsl x23, x25, #29", + "orr w25, w22, w24", + "orr w22, w23, w25, lsl #30", + "eor w23, w24, #0x1", + "mov x26, x23", "ldrb w23, [x28, #1026]", - "lsl w21, w21, w20", - "bic w21, w23, w21", + "lsl w24, w21, w20", + "bic w21, w23, w24", "strb w21, [x28, #1026]", - "add w20, w20, #0x1 (1)", - "and w20, w20, #0x7", + "add w21, w20, #0x1 (1)", + "and w20, w21, #0x7", "strb w20, [x28, #747]", "msr nzcv, x22" ]