From e60416d08019d6a78fa49544eb9ac1830bdc6d4c Mon Sep 17 00:00:00 2001
From: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Date: Mon, 29 Apr 2024 21:40:04 -0400
Subject: [PATCH] InstCountCI: Update

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
---
 unittests/InstructionCountCI/AFP/H0F3A.json   |   80 +-
 .../AFP/SVE256/Secondary.json                 |   19 +-
 .../AFP/SVE256/Secondary_REP.json             |  202 +-
 .../AFP/SVE256/Secondary_REPNE.json           |  192 +-
 .../InstructionCountCI/AFP/Secondary.json     |   19 +-
 .../InstructionCountCI/AFP/Secondary_REP.json |  202 +-
 .../AFP/Secondary_REPNE.json                  |  192 +-
 .../InstructionCountCI/AFP/VEX_map1.json      |  410 +-
 .../InstructionCountCI/AFP/VEX_map3.json      |   80 +-
 unittests/InstructionCountCI/Atomics.json     | 1926 ++--
 .../InstructionCountCI/Crypto/H0F38.json      |  105 +-
 .../InstructionCountCI/Crypto/H0F3A.json      |   58 +-
 unittests/InstructionCountCI/DDD.json         |  117 +-
 .../FEXOpt/AddressingLimitations.json         | 1078 ++-
 .../FEXOpt/AddressingLimitations_32Bit.json   |  324 +-
 .../InstructionCountCI/FEXOpt/MultiInst.json  | 1033 ++-
 .../FEXOpt/MultiInst_AFP.json                 |   13 +-
 .../InstructionCountCI/FEXOpt/libnss.json     | 4900 +++++-----
 .../InstructionCountCI/FlagM/Atomics.json     | 1678 ++--
 .../InstructionCountCI/FlagM/FlagOpts.json    |  370 +-
 unittests/InstructionCountCI/FlagM/H0F38.json |  126 +-
 .../InstructionCountCI/FlagM/HotBlocks.json   |  311 +-
 .../FlagM/HotBlocks_32Bit.json                | 1588 ++--
 .../FlagM/HotBlocks_AFP.json                  |  193 +-
 .../InstructionCountCI/FlagM/Primary.json     | 3612 +++++---
 .../FlagM/PrimaryGroup.json                   | 3146 ++++---
 .../FlagM/Primary_32Bit.json                  |  569 +-
 .../InstructionCountCI/FlagM/Secondary.json   | 2162 +++--
 .../FlagM/SecondaryGroup.json                 | 1627 ++--
 .../FlagM/SecondaryModRM.json                 |   26 +-
 .../FlagM/Secondary_OpSize.json               |   60 +-
 .../FlagM/Secondary_REP.json                  |  130 +-
 .../InstructionCountCI/FlagM/VEX_map1.json    |  114 +-
 .../InstructionCountCI/FlagM/VEX_map2.json    |  524 +-
 .../FlagM/VEX_map_group.json                  |   84 +-
 unittests/InstructionCountCI/FlagM/x87.json   | 7884 ++++++++--------
 .../InstructionCountCI/FlagM/x87_f64.json     | 5880 ++++++------
 unittests/InstructionCountCI/H0F38.json       |  920 +-
 unittests/InstructionCountCI/H0F3A.json       | 1622 ++--
 .../InstructionCountCI/H0F3A_SVE128.json      |  236 +-
 unittests/InstructionCountCI/Primary.json     | 5606 +++++++-----
 .../InstructionCountCI/PrimaryGroup.json      | 4289 +++++----
 .../InstructionCountCI/Primary_32Bit.json     |  599 +-
 unittests/InstructionCountCI/RPRES/DDD.json   |   16 +-
 .../InstructionCountCI/RPRES/Secondary.json   |   12 +-
 .../RPRES/Secondary_REP_AFP.json              |   16 +-
 .../RPRES/VEX_map1_AFP.json                   |   42 +-
 unittests/InstructionCountCI/Secondary.json   | 3871 ++++----
 .../InstructionCountCI/SecondaryGroup.json    | 1889 ++--
 .../InstructionCountCI/SecondaryModRM.json    |   31 +-
 .../InstructionCountCI/Secondary_32Bit.json   |   50 +-
 .../InstructionCountCI/Secondary_OpSize.json  | 1317 ++-
 .../Secondary_OpSize_FCMA.json                |    9 +-
 .../Secondary_OpSize_SVE128.json              |   94 +-
 .../Secondary_OpSize_SVE256.json              |   16 +-
 .../InstructionCountCI/Secondary_REP.json     |  617 +-
 .../InstructionCountCI/Secondary_REPNE.json   |  415 +-
 .../Secondary_REPNE_FCMA.json                 |    9 +-
 .../InstructionCountCI/Secondary_SVE128.json  |   84 +-
 unittests/InstructionCountCI/VEX_map1.json    | 4960 ++++++----
 .../InstructionCountCI/VEX_map1_FCMA.json     |   88 +-
 unittests/InstructionCountCI/VEX_map2.json    | 2193 +++--
 unittests/InstructionCountCI/VEX_map3.json    | 4464 +++++----
 .../InstructionCountCI/VEX_map_group.json     |  517 +-
 unittests/InstructionCountCI/x87.json         | 7990 +++++++++--------
 unittests/InstructionCountCI/x87_f64.json     | 6480 ++++++-------
 66 files changed, 51525 insertions(+), 37961 deletions(-)

diff --git a/unittests/InstructionCountCI/AFP/H0F3A.json b/unittests/InstructionCountCI/AFP/H0F3A.json
index 7ee3d95588..154a982d70 100644
--- a/unittests/InstructionCountCI/AFP/H0F3A.json
+++ b/unittests/InstructionCountCI/AFP/H0F3A.json
@@ -11,103 +11,143 @@
   },
   "Instructions": {
     "roundss xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Nearest rounding",
         "0x66 0x0f 0x3a 0x0a"
       ],
       "ExpectedArm64ASM": [
-        "frintn s16, s17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frintn s4, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundss xmm0, xmm1, 00000001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "-inf rounding",
         "0x66 0x0f 0x3a 0x0a"
       ],
       "ExpectedArm64ASM": [
-        "frintm s16, s17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frintm s4, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundss xmm0, xmm1, 00000010b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "+inf rounding",
         "0x66 0x0f 0x3a 0x0a"
       ],
       "ExpectedArm64ASM": [
-        "frintp s16, s17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frintp s4, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundss xmm0, xmm1, 00000011b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "truncate rounding",
         "0x66 0x0f 0x3a 0x0a"
       ],
       "ExpectedArm64ASM": [
-        "frintz s16, s17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frintz s4, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundss xmm0, xmm1, 00000100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "host rounding mode rounding",
         "0x66 0x0f 0x3a 0x0a"
       ],
       "ExpectedArm64ASM": [
-        "frinti s16, s17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frinti s4, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundsd xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Nearest rounding",
         "0x66 0x0f 0x3a 0x0b"
       ],
       "ExpectedArm64ASM": [
-        "frintn d16, d17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frintn d4, d3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundsd xmm0, xmm1, 00000001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "-inf rounding",
         "0x66 0x0f 0x3a 0x0b"
       ],
       "ExpectedArm64ASM": [
-        "frintm d16, d17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frintm d4, d3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundsd xmm0, xmm1, 00000010b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "+inf rounding",
         "0x66 0x0f 0x3a 0x0b"
       ],
       "ExpectedArm64ASM": [
-        "frintp d16, d17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frintp d4, d3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundsd xmm0, xmm1, 00000011b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "truncate rounding",
         "0x66 0x0f 0x3a 0x0b"
       ],
       "ExpectedArm64ASM": [
-        "frintz d16, d17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frintz d4, d3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundsd xmm0, xmm1, 00000100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "host rounding mode rounding",
         "0x66 0x0f 0x3a 0x0b"
       ],
       "ExpectedArm64ASM": [
-        "frinti d16, d17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frinti d4, d3",
+        "mov v16.16b, v4.16b"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/AFP/SVE256/Secondary.json b/unittests/InstructionCountCI/AFP/SVE256/Secondary.json
index 43e9b71437..ee7cfb66a3 100644
--- a/unittests/InstructionCountCI/AFP/SVE256/Secondary.json
+++ b/unittests/InstructionCountCI/AFP/SVE256/Secondary.json
@@ -10,23 +10,30 @@
   },
   "Instructions": {
     "cvtpi2ps xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
-        "scvtf v16.2s, v2.2s"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "ldr d3, [x20]",
+        "mov z4.d, z2.d",
+        "scvtf v4.2s, v3.2s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cvtpi2ps xmm0, mm0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr d2, [x28, #768]",
-        "scvtf v16.2s, v2.2s"
+        "mov z2.d, p7/m, z16.d",
+        "ldr d3, [x28, #768]",
+        "mov z4.d, z2.d",
+        "scvtf v4.2s, v3.2s",
+        "mov z16.d, p7/m, z4.d"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json b/unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json
index 3130ee5890..7230fb7ea4 100644
--- a/unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json
+++ b/unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json
@@ -10,219 +10,307 @@
   },
   "Instructions": {
     "cvtsi2ss xmm0, eax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "scvtf s16, w4"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "mov z3.d, z2.d",
+        "scvtf s3, w20",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "cvtsi2ss xmm0, dword [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf3 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr s2, [x4]",
-        "scvtf s16, s2"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "ldr s3, [x20]",
+        "mov z4.d, z2.d",
+        "scvtf s4, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cvtsi2ss xmm0, qword [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf3 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr x20, [x4]",
-        "scvtf s16, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "ldr x21, [x20]",
+        "mov z3.d, z2.d",
+        "scvtf s3, x21",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "sqrtss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xf3 0x0f 0x51",
       "ExpectedArm64ASM": [
-        "fsqrt s16, s17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fsqrt s4, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "rsqrtss xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "FEAT_FPRES could make this more optimal",
         "0xf3 0x0f 0x52"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
         "fmov s0, #0x70 (1.0000)",
-        "fsqrt s1, s17",
-        "fdiv s16, s0, s1"
+        "fsqrt s1, s3",
+        "fdiv s4, s0, s1",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "rcpss xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "FEAT_FPRES could make this more optimal",
         "0xf3 0x0f 0x53"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
         "fmov s0, #0x70 (1.0000)",
-        "fdiv s16, s0, s17"
+        "fdiv s4, s0, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "addss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0x58"
       ],
       "ExpectedArm64ASM": [
-        "fadd s16, s16, s17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fadd s4, s2, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "mulss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0x59"
       ],
       "ExpectedArm64ASM": [
-        "fmul s16, s16, s17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fmul s4, s2, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cvtss2sd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xf3 0x0f 0x5a",
       "ExpectedArm64ASM": [
-        "fcvt d16, s17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fcvt d4, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cvtss2sd xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0xf3 0x0f 0x5a",
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
-        "fcvt d16, s2"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "ldr d3, [x20]",
+        "mov z4.d, z2.d",
+        "fcvt d4, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "subss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0x5c"
       ],
       "ExpectedArm64ASM": [
-        "fsub s16, s16, s17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fsub s4, s2, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "minss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0x5d"
       ],
       "ExpectedArm64ASM": [
-        "fmin s16, s16, s17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fmin s4, s2, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "divss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0x5e"
       ],
       "ExpectedArm64ASM": [
-        "fdiv s16, s16, s17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fdiv s4, s2, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "maxss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0x5f"
       ],
       "ExpectedArm64ASM": [
-        "fmax s16, s16, s17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fmax s4, s2, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cmpss xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq s16, s16, s17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fcmeq s4, s2, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cmpss xmm0, xmm1, 1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt s16, s17, s16"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fcmgt s4, s3, s2",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cmpss xmm0, xmm1, 2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge s16, s17, s16"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fcmge s4, s3, s2",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cmpss xmm0, xmm1, 3": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge s0, s16, s17",
-        "fcmgt s1, s17, s16",
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fcmge s0, s2, s3",
+        "fcmgt s1, s3, s2",
         "orr v0.8b, v0.8b, v1.8b",
         "mvn v0.8b, v0.8b",
         "ptrue p0.s, vl1",
-        "mov z16.s, p0/m, z0.s"
+        "mov z4.s, p0/m, z0.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cmpss xmm0, xmm1, 4": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq s0, s16, s17",
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fcmeq s0, s2, s3",
         "mvn v0.8b, v0.8b",
         "ptrue p0.s, vl1",
-        "mov z16.s, p0/m, z0.s"
+        "mov z4.s, p0/m, z0.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cmpss xmm0, xmm1, 5": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt s2, s17, s16",
-        "mvn v2.16b, v2.16b",
-        "mov v16.s[0], v2.s[0]"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "fcmgt s4, s3, s2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cmpss xmm0, xmm1, 6": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge s2, s17, s16",
-        "mvn v2.16b, v2.16b",
-        "mov v16.s[0], v2.s[0]"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "fcmge s4, s3, s2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cmpss xmm0, xmm1, 7": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge s0, s16, s17",
-        "fcmgt s1, s17, s16",
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fcmge s0, s2, s3",
+        "fcmgt s1, s3, s2",
         "orr v0.8b, v0.8b, v1.8b",
         "ptrue p0.s, vl1",
-        "mov z16.s, p0/m, z0.s"
+        "mov z4.s, p0/m, z0.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/AFP/SVE256/Secondary_REPNE.json b/unittests/InstructionCountCI/AFP/SVE256/Secondary_REPNE.json
index b95a4d724b..4453a9005b 100644
--- a/unittests/InstructionCountCI/AFP/SVE256/Secondary_REPNE.json
+++ b/unittests/InstructionCountCI/AFP/SVE256/Secondary_REPNE.json
@@ -10,211 +10,295 @@
   },
   "Instructions": {
     "cvtsi2sd xmm0, eax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "scvtf d16, w4"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "mov z3.d, z2.d",
+        "scvtf d3, w20",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "cvtsi2sd xmm0, dword [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf2 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr w20, [x4]",
-        "scvtf d16, w20"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "ldr w21, [x20]",
+        "mov z3.d, z2.d",
+        "scvtf d3, w21",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "cvtsi2sd xmm0, rax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "scvtf d16, x4"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "mov z3.d, z2.d",
+        "scvtf d3, x20",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "cvtsi2sd xmm0, qword [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf2 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
-        "scvtf d16, d2"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "ldr d3, [x20]",
+        "mov z4.d, z2.d",
+        "scvtf d4, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "sqrtsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x51"
       ],
       "ExpectedArm64ASM": [
-        "fsqrt d16, d17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fsqrt d4, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "addsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x58"
       ],
       "ExpectedArm64ASM": [
-        "fadd d16, d16, d17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fadd d4, d2, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "mulsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x59"
       ],
       "ExpectedArm64ASM": [
-        "fmul d16, d16, d17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fmul d4, d2, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cvtsd2ss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x5a"
       ],
       "ExpectedArm64ASM": [
-        "fcvt s16, d17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fcvt s4, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cvtsd2ss xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf2 0x0f 0x5a"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "fcvt s16, d2"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "mov z4.d, z2.d",
+        "fcvt s4, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "subsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x5c"
       ],
       "ExpectedArm64ASM": [
-        "fsub d16, d16, d17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fsub d4, d2, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "minsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x5d"
       ],
       "ExpectedArm64ASM": [
-        "fmin d16, d16, d17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fmin d4, d2, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "divsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x5e"
       ],
       "ExpectedArm64ASM": [
-        "fdiv d16, d16, d17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fdiv d4, d2, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "maxsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x5f"
       ],
       "ExpectedArm64ASM": [
-        "fmax d16, d16, d17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fmax d4, d2, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cmpsd xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq d16, d16, d17"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fcmeq d4, d2, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cmpsd xmm0, xmm1, 1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt d16, d17, d16"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fcmgt d4, d3, d2",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cmpsd xmm0, xmm1, 2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge d16, d17, d16"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fcmge d4, d3, d2",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cmpsd xmm0, xmm1, 3": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge d0, d16, d17",
-        "fcmgt d1, d17, d16",
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fcmge d0, d2, d3",
+        "fcmgt d1, d3, d2",
         "orr v0.8b, v0.8b, v1.8b",
         "mvn v0.8b, v0.8b",
         "ptrue p0.d, vl1",
-        "mov z16.d, p0/m, z0.d"
+        "mov z4.d, p0/m, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cmpsd xmm0, xmm1, 4": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq d0, d16, d17",
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fcmeq d0, d2, d3",
         "mvn v0.8b, v0.8b",
         "ptrue p0.d, vl1",
-        "mov z16.d, p0/m, z0.d"
+        "mov z4.d, p0/m, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cmpsd xmm0, xmm1, 5": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt d2, d17, d16",
-        "mvn v2.16b, v2.16b",
-        "mov v16.d[0], v2.d[0]"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "fcmgt d4, d3, d2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cmpsd xmm0, xmm1, 6": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge d2, d17, d16",
-        "mvn v2.16b, v2.16b",
-        "mov v16.d[0], v2.d[0]"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "fcmge d4, d3, d2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "cmpsd xmm0, xmm1, 7": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge d0, d16, d17",
-        "fcmgt d1, d17, d16",
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov z4.d, z2.d",
+        "fcmge d0, d2, d3",
+        "fcmgt d1, d3, d2",
         "orr v0.8b, v0.8b, v1.8b",
         "ptrue p0.d, vl1",
-        "mov z16.d, p0/m, z0.d"
+        "mov z4.d, p0/m, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/AFP/Secondary.json b/unittests/InstructionCountCI/AFP/Secondary.json
index b0194320b4..c38c35bd77 100644
--- a/unittests/InstructionCountCI/AFP/Secondary.json
+++ b/unittests/InstructionCountCI/AFP/Secondary.json
@@ -11,23 +11,30 @@
   },
   "Instructions": {
     "cvtpi2ps xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
-        "scvtf v16.2s, v2.2s"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr d3, [x20]",
+        "mov v4.16b, v2.16b",
+        "scvtf v4.2s, v3.2s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cvtpi2ps xmm0, mm0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr d2, [x28, #768]",
-        "scvtf v16.2s, v2.2s"
+        "mov v2.16b, v16.16b",
+        "ldr d3, [x28, #768]",
+        "mov v4.16b, v2.16b",
+        "scvtf v4.2s, v3.2s",
+        "mov v16.16b, v4.16b"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/AFP/Secondary_REP.json b/unittests/InstructionCountCI/AFP/Secondary_REP.json
index 055c99cd2b..ba735a0e96 100644
--- a/unittests/InstructionCountCI/AFP/Secondary_REP.json
+++ b/unittests/InstructionCountCI/AFP/Secondary_REP.json
@@ -11,216 +11,304 @@
   },
   "Instructions": {
     "cvtsi2ss xmm0, eax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "scvtf s16, w4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "scvtf s3, w20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "cvtsi2ss xmm0, dword [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf3 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr s2, [x4]",
-        "scvtf s16, s2"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr s3, [x20]",
+        "mov v4.16b, v2.16b",
+        "scvtf s4, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cvtsi2ss xmm0, qword [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf3 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr x20, [x4]",
-        "scvtf s16, x20"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr x21, [x20]",
+        "mov v3.16b, v2.16b",
+        "scvtf s3, x21",
+        "mov v16.16b, v3.16b"
       ]
     },
     "sqrtss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xf3 0x0f 0x51",
       "ExpectedArm64ASM": [
-        "fsqrt s16, s17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fsqrt s4, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "rsqrtss xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "FEAT_FPRES could make this more optimal",
         "0xf3 0x0f 0x52"
       ],
       "ExpectedArm64ASM": [
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
         "fmov s0, #0x70 (1.0000)",
-        "fsqrt s1, s17",
-        "fdiv s16, s0, s1"
+        "fsqrt s1, s3",
+        "fdiv s4, s0, s1",
+        "mov v16.16b, v4.16b"
       ]
     },
     "rcpss xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "FEAT_FPRES could make this more optimal",
         "0xf3 0x0f 0x53"
       ],
       "ExpectedArm64ASM": [
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
         "fmov s0, #0x70 (1.0000)",
-        "fdiv s16, s0, s17"
+        "fdiv s4, s0, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "addss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0x58"
       ],
       "ExpectedArm64ASM": [
-        "fadd s16, s16, s17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fadd s4, s2, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "mulss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0x59"
       ],
       "ExpectedArm64ASM": [
-        "fmul s16, s16, s17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fmul s4, s2, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cvtss2sd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xf3 0x0f 0x5a",
       "ExpectedArm64ASM": [
-        "fcvt d16, s17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcvt d4, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cvtss2sd xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0xf3 0x0f 0x5a",
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
-        "fcvt d16, s2"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr d3, [x20]",
+        "mov v4.16b, v2.16b",
+        "fcvt d4, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "subss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0x5c"
       ],
       "ExpectedArm64ASM": [
-        "fsub s16, s16, s17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fsub s4, s2, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "minss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0x5d"
       ],
       "ExpectedArm64ASM": [
-        "fmin s16, s16, s17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fmin s4, s2, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "divss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0x5e"
       ],
       "ExpectedArm64ASM": [
-        "fdiv s16, s16, s17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fdiv s4, s2, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "maxss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0x5f"
       ],
       "ExpectedArm64ASM": [
-        "fmax s16, s16, s17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fmax s4, s2, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpss xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq s16, s16, s17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmeq s4, s2, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpss xmm0, xmm1, 1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt s16, s17, s16"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmgt s4, s3, s2",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpss xmm0, xmm1, 2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge s16, s17, s16"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmge s4, s3, s2",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpss xmm0, xmm1, 3": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge s0, s16, s17",
-        "fcmgt s1, s17, s16",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmge s0, s2, s3",
+        "fcmgt s1, s3, s2",
         "orr v0.8b, v0.8b, v1.8b",
         "mvn v0.8b, v0.8b",
-        "mov v16.s[0], v0.s[0]"
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpss xmm0, xmm1, 4": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq s0, s16, s17",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmeq s0, s2, s3",
         "mvn v0.8b, v0.8b",
-        "mov v16.s[0], v0.s[0]"
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpss xmm0, xmm1, 5": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt s2, s17, s16",
-        "mvn v2.16b, v2.16b",
-        "mov v16.s[0], v2.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fcmgt s4, s3, s2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpss xmm0, xmm1, 6": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge s2, s17, s16",
-        "mvn v2.16b, v2.16b",
-        "mov v16.s[0], v2.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fcmge s4, s3, s2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpss xmm0, xmm1, 7": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge s0, s16, s17",
-        "fcmgt s1, s17, s16",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmge s0, s2, s3",
+        "fcmgt s1, s3, s2",
         "orr v0.8b, v0.8b, v1.8b",
-        "mov v16.s[0], v0.s[0]"
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/AFP/Secondary_REPNE.json b/unittests/InstructionCountCI/AFP/Secondary_REPNE.json
index e37a74d6f6..a2407c9e0f 100644
--- a/unittests/InstructionCountCI/AFP/Secondary_REPNE.json
+++ b/unittests/InstructionCountCI/AFP/Secondary_REPNE.json
@@ -11,208 +11,292 @@
   },
   "Instructions": {
     "cvtsi2sd xmm0, eax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "scvtf d16, w4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "scvtf d3, w20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "cvtsi2sd xmm0, dword [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf2 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr w20, [x4]",
-        "scvtf d16, w20"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr w21, [x20]",
+        "mov v3.16b, v2.16b",
+        "scvtf d3, w21",
+        "mov v16.16b, v3.16b"
       ]
     },
     "cvtsi2sd xmm0, rax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "scvtf d16, x4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "scvtf d3, x20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "cvtsi2sd xmm0, qword [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf2 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
-        "scvtf d16, d2"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr d3, [x20]",
+        "mov v4.16b, v2.16b",
+        "scvtf d4, d3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "sqrtsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x51"
       ],
       "ExpectedArm64ASM": [
-        "fsqrt d16, d17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fsqrt d4, d3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "addsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x58"
       ],
       "ExpectedArm64ASM": [
-        "fadd d16, d16, d17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fadd d4, d2, d3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "mulsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x59"
       ],
       "ExpectedArm64ASM": [
-        "fmul d16, d16, d17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fmul d4, d2, d3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cvtsd2ss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x5a"
       ],
       "ExpectedArm64ASM": [
-        "fcvt s16, d17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcvt s4, d3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cvtsd2ss xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf2 0x0f 0x5a"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "fcvt s16, d2"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "mov v4.16b, v2.16b",
+        "fcvt s4, d3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "subsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x5c"
       ],
       "ExpectedArm64ASM": [
-        "fsub d16, d16, d17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fsub d4, d2, d3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "minsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x5d"
       ],
       "ExpectedArm64ASM": [
-        "fmin d16, d16, d17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fmin d4, d2, d3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "divsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x5e"
       ],
       "ExpectedArm64ASM": [
-        "fdiv d16, d16, d17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fdiv d4, d2, d3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "maxsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0x5f"
       ],
       "ExpectedArm64ASM": [
-        "fmax d16, d16, d17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fmax d4, d2, d3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpsd xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq d16, d16, d17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmeq d4, d2, d3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpsd xmm0, xmm1, 1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt d16, d17, d16"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmgt d4, d3, d2",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpsd xmm0, xmm1, 2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge d16, d17, d16"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmge d4, d3, d2",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpsd xmm0, xmm1, 3": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge d0, d16, d17",
-        "fcmgt d1, d17, d16",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmge d0, d2, d3",
+        "fcmgt d1, d3, d2",
         "orr v0.8b, v0.8b, v1.8b",
         "mvn v0.8b, v0.8b",
-        "mov v16.d[0], v0.d[0]"
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpsd xmm0, xmm1, 4": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq d0, d16, d17",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmeq d0, d2, d3",
         "mvn v0.8b, v0.8b",
-        "mov v16.d[0], v0.d[0]"
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpsd xmm0, xmm1, 5": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt d2, d17, d16",
-        "mvn v2.16b, v2.16b",
-        "mov v16.d[0], v2.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fcmgt d4, d3, d2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpsd xmm0, xmm1, 6": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge d2, d17, d16",
-        "mvn v2.16b, v2.16b",
-        "mov v16.d[0], v2.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fcmge d4, d3, d2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpsd xmm0, xmm1, 7": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge d0, d16, d17",
-        "fcmgt d1, d17, d16",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmge d0, d2, d3",
+        "fcmgt d1, d3, d2",
         "orr v0.8b, v0.8b, v1.8b",
-        "mov v16.d[0], v0.d[0]"
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/AFP/VEX_map1.json b/unittests/InstructionCountCI/AFP/VEX_map1.json
index 3ba72ceb85..ac96da3810 100644
--- a/unittests/InstructionCountCI/AFP/VEX_map1.json
+++ b/unittests/InstructionCountCI/AFP/VEX_map1.json
@@ -10,442 +10,562 @@
   },
   "Instructions": {
     "vsqrtss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b10 0x51 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fsqrt s16, s18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fsqrt s4, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vsqrtsd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b11 0x51 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fsqrt d16, d18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fsqrt d4, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vrsqrtss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "FEAT_FPRES could make this more optimal",
         "Map 1 0b10 0x52 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
         "fmov s0, #0x70 (1.0000)",
-        "fsqrt s1, s18",
-        "fdiv s16, s0, s1"
+        "fsqrt s1, s3",
+        "fdiv s4, s0, s1",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vrcpss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "FEAT_FPRES could make this more optimal",
         "Map 1 0b10 0x53 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
         "fmov s0, #0x70 (1.0000)",
-        "fdiv s16, s0, s18"
+        "fdiv s4, s0, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpss xmm0, xmm1, xmm2, 0x00": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b10 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmeq s16, s17, s18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmeq s4, s2, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpss xmm0, xmm1, xmm2, 0x01": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b10 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmgt s16, s18, s17"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmgt s4, s3, s2",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpss xmm0, xmm1, xmm2, 0x02": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b10 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmge s16, s18, s17"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmge s4, s3, s2",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpss xmm0, xmm1, xmm2, 0x03": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b10 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmge s0, s17, s18",
-        "fcmgt s1, s18, s17",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmge s0, s2, s3",
+        "fcmgt s1, s3, s2",
         "orr v0.8b, v0.8b, v1.8b",
         "mvn v0.8b, v0.8b",
-        "mov v16.s[0], v0.s[0]"
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpss xmm0, xmm1, xmm2, 0x04": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b10 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmeq s0, s17, s18",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmeq s0, s2, s3",
         "mvn v0.8b, v0.8b",
-        "mov v16.s[0], v0.s[0]"
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpss xmm0, xmm1, xmm2, 0x05": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b10 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt s2, s18, s17",
-        "mvn v2.16b, v2.16b",
-        "mov v16.16b, v17.16b",
-        "mov v16.s[0], v2.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt s4, s3, s2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpss xmm0, xmm1, xmm2, 0x06": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b10 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmge s2, s18, s17",
-        "mvn v2.16b, v2.16b",
-        "mov v16.16b, v17.16b",
-        "mov v16.s[0], v2.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmge s4, s3, s2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpss xmm0, xmm1, xmm2, 0x07": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b10 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmge s0, s17, s18",
-        "fcmgt s1, s18, s17",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmge s0, s2, s3",
+        "fcmgt s1, s3, s2",
         "orr v0.8b, v0.8b, v1.8b",
-        "mov v16.s[0], v0.s[0]"
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpsd xmm0, xmm1, xmm2, 0x00": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b11 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmeq d16, d17, d18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmeq d4, d2, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpsd xmm0, xmm1, xmm2, 0x01": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b11 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmgt d16, d18, d17"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmgt d4, d3, d2",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpsd xmm0, xmm1, xmm2, 0x02": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b11 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmge d16, d18, d17"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmge d4, d3, d2",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpsd xmm0, xmm1, xmm2, 0x03": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b11 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmge d0, d17, d18",
-        "fcmgt d1, d18, d17",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmge d0, d2, d3",
+        "fcmgt d1, d3, d2",
         "orr v0.8b, v0.8b, v1.8b",
         "mvn v0.8b, v0.8b",
-        "mov v16.d[0], v0.d[0]"
+        "mov v4.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpsd xmm0, xmm1, xmm2, 0x04": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b11 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmeq d0, d17, d18",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmeq d0, d2, d3",
         "mvn v0.8b, v0.8b",
-        "mov v16.d[0], v0.d[0]"
+        "mov v4.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpsd xmm0, xmm1, xmm2, 0x05": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b11 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt d2, d18, d17",
-        "mvn v2.16b, v2.16b",
-        "mov v16.16b, v17.16b",
-        "mov v16.d[0], v2.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt d4, d3, d2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpsd xmm0, xmm1, xmm2, 0x06": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b11 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmge d2, d18, d17",
-        "mvn v2.16b, v2.16b",
-        "mov v16.16b, v17.16b",
-        "mov v16.d[0], v2.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmge d4, d3, d2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpsd xmm0, xmm1, xmm2, 0x07": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b11 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmge d0, d17, d18",
-        "fcmgt d1, d18, d17",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmge d0, d2, d3",
+        "fcmgt d1, d3, d2",
         "orr v0.8b, v0.8b, v1.8b",
-        "mov v16.d[0], v0.d[0]"
+        "mov v4.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcvtsi2ss xmm0, xmm1, eax": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b10 0x2A 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "scvtf s16, w4"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "scvtf s3, w20",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vcvtsi2ss xmm0, xmm1, rax": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b10 0x2A 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "scvtf s16, x4"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "scvtf s3, x20",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vcvtsi2sd xmm0, xmm1, eax": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b11 0x2A 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "scvtf d16, w4"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "scvtf d3, w20",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vcvtsi2sd xmm0, xmm1, rax": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b11 0x2A 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "scvtf d16, x4"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "scvtf d3, x20",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vmulss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b10 0x59 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fmul s16, s17, s18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fmul s4, s2, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmulsd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b11 0x59 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fmul d16, d17, d18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fmul d4, d2, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcvtss2sd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b10 0x5a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcvt d16, s18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcvt d4, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcvtsd2ss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b11 0x5a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcvt s16, d18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcvt s4, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vsubss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b10 0x5c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fsub s16, s17, s18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fsub s4, s2, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vsubsd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b11 0x5c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fsub d16, d17, d18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fsub d4, d2, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vminss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b10 0x5d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fmin s16, s17, s18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fmin s4, s2, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vminsd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b11 0x5d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fmin d16, d17, d18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fmin d4, d2, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vdivss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b10 0x5e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fdiv s16, s17, s18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fdiv s4, s2, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vdivsd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b11 0x5e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fdiv d16, d17, d18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fdiv d4, d2, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmaxss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b10 0x5f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fmax s16, s17, s18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fmax s4, s2, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmaxsd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b11 0x5f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fmax d16, d17, d18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fmax d4, d2, d3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vminps xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b00 0x5d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt v0.4s, v18.4s, v17.4s",
-        "mov v16.16b, v17.16b",
-        "bif v16.16b, v18.16b, v0.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt v0.4s, v3.4s, v2.4s",
+        "mov v4.16b, v2.16b",
+        "bif v4.16b, v3.16b, v0.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vminps ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b00 0x5d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt p0.s, p7/z, z18.s, z17.s",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt p0.s, p7/z, z3.s, z2.s",
         "not p0.b, p7/z, p0.b",
-        "mov z0.d, z17.d",
-        "mov z0.s, p0/m, z18.s",
-        "mov z16.d, z0.d"
+        "mov z0.d, z2.d",
+        "mov z0.s, p0/m, z3.s",
+        "mov z4.d, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vminpd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0x5d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt v0.2d, v18.2d, v17.2d",
-        "mov v16.16b, v17.16b",
-        "bif v16.16b, v18.16b, v0.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt v0.2d, v3.2d, v2.2d",
+        "mov v4.16b, v2.16b",
+        "bif v4.16b, v3.16b, v0.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vminpd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b01 0x5d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt p0.d, p7/z, z18.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt p0.d, p7/z, z3.d, z2.d",
         "not p0.b, p7/z, p0.b",
-        "mov z0.d, z17.d",
-        "mov z0.d, p0/m, z18.d",
-        "mov z16.d, z0.d"
+        "mov z0.d, z2.d",
+        "mov z0.d, p0/m, z3.d",
+        "mov z4.d, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/AFP/VEX_map3.json b/unittests/InstructionCountCI/AFP/VEX_map3.json
index fa4f9c943d..7c59988a64 100644
--- a/unittests/InstructionCountCI/AFP/VEX_map3.json
+++ b/unittests/InstructionCountCI/AFP/VEX_map3.json
@@ -10,113 +10,133 @@
   },
   "Instructions": {
     "vroundss xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "nearest rounding",
         "Map 3 0b01 0x0a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frintn s16, s16"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frintn s3, s2",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundss xmm0, xmm1, 00000001b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "-inf rounding",
         "Map 3 0b01 0x0a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frintm s16, s16"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frintm s3, s2",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundss xmm0, xmm1, 00000010b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "+inf rounding",
         "Map 3 0b01 0x0a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frintp s16, s16"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frintp s3, s2",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundss xmm0, xmm1, 00000011b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "truncate rounding",
         "Map 3 0b01 0x0a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frintz s16, s16"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frintz s3, s2",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundss xmm0, xmm1, 00000100b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "host mode rounding",
         "Map 3 0b01 0x0a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frinti s16, s16"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frinti s3, s2",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundsd xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "nearest rounding",
         "Map 3 0b01 0x0b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frintn d16, d16"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frintn d3, d2",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundsd xmm0, xmm1, 00000001b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "-inf rounding",
         "Map 3 0b01 0x0b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frintm d16, d16"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frintm d3, d2",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundsd xmm0, xmm1, 00000010b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "+inf rounding",
         "Map 3 0b01 0x0b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frintp d16, d16"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frintp d3, d2",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundsd xmm0, xmm1, 00000011b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "truncate rounding",
         "Map 3 0b01 0x0b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frintz d16, d16"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frintz d3, d2",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundsd xmm0, xmm1, 00000100b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "host mode rounding",
         "Map 3 0b01 0x0b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frinti d16, d16"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frinti d3, d2",
+        "mov z16.d, p7/m, z3.d"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/Atomics.json b/unittests/InstructionCountCI/Atomics.json
index 9936778c38..9f8e5e4763 100644
--- a/unittests/InstructionCountCI/Atomics.json
+++ b/unittests/InstructionCountCI/Atomics.json
@@ -11,1631 +11,1983 @@
   },
   "Instructions": {
     "lock add byte [rax], cl": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x00",
       "ExpectedArm64ASM": [
-        "ldaddalb w5, w20, [x4]",
-        "eor w27, w20, w5",
-        "lsl w0, w20, #24",
-        "cmn w0, w5, lsl #24",
-        "add w26, w20, w5"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldaddalb w20, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x27, x21",
+        "lsl w0, w22, #24",
+        "cmn w0, w20, lsl #24",
+        "add w21, w22, w20",
+        "mov x26, x21"
       ]
     },
     "lock add word [rax], cx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x01",
       "ExpectedArm64ASM": [
-        "ldaddalh w5, w20, [x4]",
-        "eor w27, w20, w5",
-        "lsl w0, w20, #16",
-        "cmn w0, w5, lsl #16",
-        "add w26, w20, w5"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldaddalh w20, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x27, x21",
+        "lsl w0, w22, #16",
+        "cmn w0, w20, lsl #16",
+        "add w21, w22, w20",
+        "mov x26, x21"
       ]
     },
     "lock add dword [rax], ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x01",
       "ExpectedArm64ASM": [
-        "ldaddal w5, w20, [x4]",
-        "eor w27, w20, w5",
-        "adds w26, w20, w5"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldaddal w20, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x27, x21",
+        "adds w21, w22, w20",
+        "mov x26, x21"
       ]
     },
     "lock or byte [rax], cl": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x08",
       "ExpectedArm64ASM": [
-        "ldsetalb w5, w20, [x4]",
-        "orr w26, w20, w5",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldsetalb w20, w22, [x21]",
+        "orr w21, w22, w20",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "lock or word [rax], cx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x09",
       "ExpectedArm64ASM": [
-        "ldsetalh w5, w20, [x4]",
-        "orr w26, w20, w5",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldsetalh w20, w22, [x21]",
+        "orr w21, w22, w20",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #16"
       ]
     },
     "lock or dword [rax], ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x09",
       "ExpectedArm64ASM": [
-        "ldsetal w5, w20, [x4]",
-        "orr w26, w20, w5",
-        "tst w26, w26"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldsetal w20, w22, [x21]",
+        "orr w21, w22, w20",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "lock adc byte [rax], cl": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 25,
       "Comment": "0x10",
       "ExpectedArm64ASM": [
-        "adc w20, wzr, w5",
-        "ldaddalb w20, w20, [x4]",
-        "eor w27, w20, w5",
+        "mov x20, x5",
+        "adc w21, wzr, w20",
+        "mov x22, x4",
+        "ldaddalb w21, w23, [x22]",
+        "eor w21, w23, w20",
+        "mov x27, x21",
         "cset w21, hs",
-        "adc w22, w20, w5",
-        "uxtb w26, w22",
-        "cmp x26, x5",
+        "adc w22, w23, w20",
+        "uxtb w24, w22",
+        "cmp x24, x20",
         "cset x22, lo",
-        "cmp x26, x5",
-        "cset x23, ls",
+        "cmp x24, x20",
+        "cset x25, ls",
         "cmp x21, #0x1 (1)",
-        "csel x21, x23, x22, eq",
-        "cmn wzr, w26, lsl #24",
-        "mrs x22, nzcv",
-        "orr w21, w22, w21, lsl #29",
-        "eor w22, w20, w5",
-        "eor w20, w26, w20",
-        "bic w20, w20, w22",
-        "ubfx x20, x20, #7, #1",
-        "orr w20, w21, w20, lsl #28",
-        "msr nzcv, x20"
+        "csel x30, x25, x22, eq",
+        "cmn wzr, w24, lsl #24",
+        "mrs x21, nzcv",
+        "orr w22, w21, w30, lsl #29",
+        "eor w21, w23, w20",
+        "eor w20, w24, w23",
+        "bic w23, w20, w21",
+        "ubfx x20, x23, #7, #1",
+        "orr w21, w22, w20, lsl #28",
+        "mov x26, x24",
+        "msr nzcv, x21"
       ]
     },
     "lock adc word [rax], cx": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 25,
       "Comment": "0x11",
       "ExpectedArm64ASM": [
-        "adc w20, wzr, w5",
-        "ldaddalh w20, w20, [x4]",
-        "eor w27, w20, w5",
+        "mov x20, x5",
+        "adc w21, wzr, w20",
+        "mov x22, x4",
+        "ldaddalh w21, w23, [x22]",
+        "eor w21, w23, w20",
+        "mov x27, x21",
         "cset w21, hs",
-        "adc w22, w20, w5",
-        "uxth w26, w22",
-        "cmp x26, x5",
+        "adc w22, w23, w20",
+        "uxth w24, w22",
+        "cmp x24, x20",
         "cset x22, lo",
-        "cmp x26, x5",
-        "cset x23, ls",
+        "cmp x24, x20",
+        "cset x25, ls",
         "cmp x21, #0x1 (1)",
-        "csel x21, x23, x22, eq",
-        "cmn wzr, w26, lsl #16",
-        "mrs x22, nzcv",
-        "orr w21, w22, w21, lsl #29",
-        "eor w22, w20, w5",
-        "eor w20, w26, w20",
-        "bic w20, w20, w22",
-        "ubfx x20, x20, #15, #1",
-        "orr w20, w21, w20, lsl #28",
-        "msr nzcv, x20"
+        "csel x30, x25, x22, eq",
+        "cmn wzr, w24, lsl #16",
+        "mrs x21, nzcv",
+        "orr w22, w21, w30, lsl #29",
+        "eor w21, w23, w20",
+        "eor w20, w24, w23",
+        "bic w23, w20, w21",
+        "ubfx x20, x23, #15, #1",
+        "orr w21, w22, w20, lsl #28",
+        "mov x26, x24",
+        "msr nzcv, x21"
       ]
     },
     "lock adc dword [rax], ecx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x11",
       "ExpectedArm64ASM": [
-        "adc w20, wzr, w5",
-        "ldaddal w20, w20, [x4]",
-        "eor w27, w20, w5",
-        "adcs w26, w20, w5"
+        "mov x20, x5",
+        "adc w21, wzr, w20",
+        "mov x22, x4",
+        "ldaddal w21, w23, [x22]",
+        "eor w21, w23, w20",
+        "mov x27, x21",
+        "adcs w21, w23, w20",
+        "mov x26, x21"
       ]
     },
     "lock sbb byte [rax], cl": {
-      "ExpectedInstructionCount": 23,
+      "ExpectedInstructionCount": 27,
       "Comment": "0x18",
       "ExpectedArm64ASM": [
-        "adc w20, wzr, w5",
-        "neg w1, w20",
-        "ldaddalb w1, w20, [x4]",
-        "eor w27, w20, w5",
+        "mov x20, x5",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddalb w1, w23, [x21]",
+        "eor w21, w23, w20",
+        "mov x27, x21",
         "cset w21, hs",
-        "add w22, w5, w21",
-        "sub w22, w20, w22",
-        "uxtb w26, w22",
-        "cmp w26, w20",
-        "cset x22, hi",
-        "cmp w26, w20",
-        "cset x23, hs",
+        "add w22, w20, w21",
+        "sub w24, w23, w22",
+        "uxtb w22, w24",
+        "cmp w22, w23",
+        "cset x24, hi",
+        "cmp w22, w23",
+        "cset x25, hs",
         "cmp x21, #0x1 (1)",
-        "csel x21, x23, x22, eq",
-        "cmn wzr, w26, lsl #24",
-        "mrs x22, nzcv",
-        "orr w21, w22, w21, lsl #29",
-        "eor w22, w20, w5",
-        "eor w20, w26, w20",
-        "and w20, w20, w22",
-        "ubfx x20, x20, #7, #1",
-        "orr w20, w21, w20, lsl #28",
-        "msr nzcv, x20"
+        "csel x30, x25, x24, eq",
+        "cmn wzr, w22, lsl #24",
+        "mrs x21, nzcv",
+        "orr w24, w21, w30, lsl #29",
+        "eor w21, w23, w20",
+        "eor w20, w22, w23",
+        "and w23, w20, w21",
+        "ubfx x20, x23, #7, #1",
+        "orr w21, w24, w20, lsl #28",
+        "mov x26, x22",
+        "msr nzcv, x21"
       ]
     },
     "lock sbb word [rax], cx": {
-      "ExpectedInstructionCount": 23,
+      "ExpectedInstructionCount": 27,
       "Comment": "0x19",
       "ExpectedArm64ASM": [
-        "adc w20, wzr, w5",
-        "neg w1, w20",
-        "ldaddalh w1, w20, [x4]",
-        "eor w27, w20, w5",
+        "mov x20, x5",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddalh w1, w23, [x21]",
+        "eor w21, w23, w20",
+        "mov x27, x21",
         "cset w21, hs",
-        "add w22, w5, w21",
-        "sub w22, w20, w22",
-        "uxth w26, w22",
-        "cmp w26, w20",
-        "cset x22, hi",
-        "cmp w26, w20",
-        "cset x23, hs",
+        "add w22, w20, w21",
+        "sub w24, w23, w22",
+        "uxth w22, w24",
+        "cmp w22, w23",
+        "cset x24, hi",
+        "cmp w22, w23",
+        "cset x25, hs",
         "cmp x21, #0x1 (1)",
-        "csel x21, x23, x22, eq",
-        "cmn wzr, w26, lsl #16",
-        "mrs x22, nzcv",
-        "orr w21, w22, w21, lsl #29",
-        "eor w22, w20, w5",
-        "eor w20, w26, w20",
-        "and w20, w20, w22",
-        "ubfx x20, x20, #15, #1",
-        "orr w20, w21, w20, lsl #28",
-        "msr nzcv, x20"
+        "csel x30, x25, x24, eq",
+        "cmn wzr, w22, lsl #16",
+        "mrs x21, nzcv",
+        "orr w24, w21, w30, lsl #29",
+        "eor w21, w23, w20",
+        "eor w20, w22, w23",
+        "and w23, w20, w21",
+        "ubfx x20, x23, #15, #1",
+        "orr w21, w24, w20, lsl #28",
+        "mov x26, x22",
+        "msr nzcv, x21"
       ]
     },
     "lock sbb dword [rax], ecx": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 15,
       "Comment": "0x19",
       "ExpectedArm64ASM": [
-        "adc w20, wzr, w5",
-        "neg w1, w20",
-        "ldaddal w1, w20, [x4]",
-        "eor w27, w20, w5",
+        "mov x20, x5",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddal w1, w23, [x21]",
+        "eor w21, w23, w20",
+        "mov x27, x21",
         "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs w26, w20, w5",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "sbcs w21, w23, w20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x26, x21",
+        "msr nzcv, x22"
       ]
     },
     "lock and byte [rax], cl": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x20",
       "ExpectedArm64ASM": [
-        "mvn w1, w5",
-        "ldclralb w1, w20, [x4]",
-        "and w26, w20, w5",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x5",
+        "mov x21, x4",
+        "mvn w1, w20",
+        "ldclralb w1, w22, [x21]",
+        "and w21, w22, w20",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21"
       ]
     },
     "lock and word [rax], cx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x21",
       "ExpectedArm64ASM": [
-        "mvn w1, w5",
-        "ldclralh w1, w20, [x4]",
-        "and w26, w20, w5",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x5",
+        "mov x21, x4",
+        "mvn w1, w20",
+        "ldclralh w1, w22, [x21]",
+        "and w21, w22, w20",
+        "cmn wzr, w21, lsl #16",
+        "mov x26, x21"
       ]
     },
     "lock and dword [rax], ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x21",
       "ExpectedArm64ASM": [
-        "mvn w1, w5",
-        "ldclral w1, w20, [x4]",
-        "ands w26, w20, w5"
+        "mov x20, x5",
+        "mov x21, x4",
+        "mvn w1, w20",
+        "ldclral w1, w22, [x21]",
+        "ands w21, w22, w20",
+        "mov x26, x21"
       ]
     },
     "lock sub byte [rax], cl": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x28",
       "ExpectedArm64ASM": [
-        "neg w1, w5",
-        "ldaddalb w1, w20, [x4]",
-        "eor w27, w20, w5",
-        "lsl w0, w20, #24",
-        "cmp w0, w5, lsl #24",
-        "sub w26, w20, w5",
+        "mov x20, x5",
+        "mov x21, x4",
+        "neg w1, w20",
+        "ldaddalb w1, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x27, x21",
+        "lsl w0, w22, #24",
+        "cmp w0, w20, lsl #24",
+        "sub w21, w22, w20",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock sub word [rax], cx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x28",
       "ExpectedArm64ASM": [
-        "neg w1, w5",
-        "ldaddalh w1, w20, [x4]",
-        "eor w27, w20, w5",
-        "lsl w0, w20, #16",
-        "cmp w0, w5, lsl #16",
-        "sub w26, w20, w5",
+        "mov x20, x5",
+        "mov x21, x4",
+        "neg w1, w20",
+        "ldaddalh w1, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x27, x21",
+        "lsl w0, w22, #16",
+        "cmp w0, w20, lsl #16",
+        "sub w21, w22, w20",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock sub dword [rax], ecx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x29",
       "ExpectedArm64ASM": [
-        "neg w1, w5",
-        "ldaddal w1, w20, [x4]",
-        "eor w27, w20, w5",
-        "subs w26, w20, w5",
+        "mov x20, x5",
+        "mov x21, x4",
+        "neg w1, w20",
+        "ldaddal w1, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x27, x21",
+        "subs w21, w22, w20",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock xor byte [rax], cl": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x30",
       "ExpectedArm64ASM": [
-        "ldeoralb w5, w20, [x4]",
-        "eor w26, w20, w5",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldeoralb w20, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "lock xor word [rax], cx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x31",
       "ExpectedArm64ASM": [
-        "ldeoralh w5, w20, [x4]",
-        "eor w26, w20, w5",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldeoralh w20, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #16"
       ]
     },
     "lock xor dword [rax], ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x31",
       "ExpectedArm64ASM": [
-        "ldeoral w5, w20, [x4]",
-        "eor w26, w20, w5",
-        "tst w26, w26"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldeoral w20, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "lock add qword [rax], rcx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x01",
       "ExpectedArm64ASM": [
-        "ldaddal x5, x20, [x4]",
-        "eor w27, w20, w5",
-        "adds x26, x20, x5"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldaddal x20, x22, [x21]",
+        "eor w21, w22, w20",
+        "mov x27, x21",
+        "adds x21, x22, x20",
+        "mov x26, x21"
       ]
     },
     "xchg byte [rax], cl": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x86",
       "ExpectedArm64ASM": [
-        "swpalb w5, w20, [x4]",
-        "bfxil x5, x20, #0, #8"
+        "mov x20, x5",
+        "mov x21, x4",
+        "swpalb w20, w22, [x21]",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #8",
+        "mov x5, x21"
       ]
     },
     "xchg word [rax], cx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x87",
       "ExpectedArm64ASM": [
-        "swpalh w5, w20, [x4]",
-        "bfxil x5, x20, #0, #16"
+        "mov x20, x5",
+        "mov x21, x4",
+        "swpalh w20, w22, [x21]",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x5, x21"
       ]
     },
     "xchg dword [rax], ecx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x87",
       "ExpectedArm64ASM": [
-        "swpal w5, w5, [x4]"
+        "mov x20, x5",
+        "mov x21, x4",
+        "swpal w20, w22, [x21]",
+        "mov x5, x22"
       ]
     },
     "xchg qword [rax], rcx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x87",
       "ExpectedArm64ASM": [
-        "swpal x5, x5, [x4]"
+        "mov x20, x5",
+        "mov x21, x4",
+        "swpal x20, x22, [x21]",
+        "mov x5, x22"
       ]
     },
     "xadd byte [rax], bl": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xc0",
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "ldaddalb w20, w21, [x4]",
-        "bfxil x7, x21, #0, #8",
-        "eor w27, w21, w20",
-        "lsl w0, w21, #24",
-        "cmn w0, w20, lsl #24",
-        "add w26, w21, w20"
+        "mov x20, x4",
+        "mov x21, x7",
+        "uxtb w22, w21",
+        "ldaddalb w22, w23, [x20]",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #8",
+        "mov x7, x20",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "lsl w0, w23, #24",
+        "cmn w0, w22, lsl #24",
+        "add w20, w23, w22",
+        "mov x26, x20"
       ]
     },
     "xadd word [rax], bx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "ldaddalh w20, w21, [x4]",
-        "bfxil x7, x21, #0, #16",
-        "eor w27, w21, w20",
-        "lsl w0, w21, #16",
-        "cmn w0, w20, lsl #16",
-        "add w26, w21, w20"
+        "mov x20, x4",
+        "mov x21, x7",
+        "uxth w22, w21",
+        "ldaddalh w22, w23, [x20]",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x7, x20",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "lsl w0, w23, #16",
+        "cmn w0, w22, lsl #16",
+        "add w20, w23, w22",
+        "mov x26, x20"
       ]
     },
     "xadd dword [rax], ebx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "ldaddal w20, w7, [x4]",
-        "eor w27, w7, w20",
-        "adds w26, w7, w20"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov w22, w21",
+        "ldaddal w22, w21, [x20]",
+        "mov x7, x21",
+        "eor w20, w21, w22",
+        "mov x27, x20",
+        "adds w20, w21, w22",
+        "mov x26, x20"
       ]
     },
     "xadd qword [rax], rbx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
-        "mov x20, x7",
-        "ldaddal x20, x7, [x4]",
-        "eor w27, w7, w20",
-        "adds x26, x7, x20"
+        "mov x20, x4",
+        "mov x21, x7",
+        "ldaddal x21, x22, [x20]",
+        "mov x7, x22",
+        "eor w20, w22, w21",
+        "mov x27, x20",
+        "adds x20, x22, x21",
+        "mov x26, x20"
       ]
     },
     "lock add byte [rax], 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x80 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldaddalb w20, w27, [x4]",
-        "lsl w0, w27, #24",
+        "mov x21, x4",
+        "ldaddalb w20, w22, [x21]",
+        "mov x27, x22",
+        "lsl w0, w22, #24",
         "cmn w0, w20, lsl #24",
-        "add w26, w27, #0x1 (1)"
+        "add w20, w22, #0x1 (1)",
+        "mov x26, x20"
       ]
     },
     "lock add byte [rax], 0xFF": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP1 0x80 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "ldaddalb w20, w21, [x4]",
-        "mvn w27, w21",
-        "lsl w0, w21, #24",
+        "mov x21, x4",
+        "ldaddalb w20, w22, [x21]",
+        "mvn w21, w22",
+        "mov x27, x21",
+        "lsl w0, w22, #24",
         "cmn w0, w20, lsl #24",
-        "add w26, w21, #0xff (255)"
+        "add w20, w22, #0xff (255)",
+        "mov x26, x20"
       ]
     },
     "lock add word [rax], 0x100": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldaddalh w20, w27, [x4]",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "ldaddalh w20, w22, [x21]",
+        "mov x27, x22",
+        "lsl w0, w22, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w27, #0x100 (256)"
+        "add w20, w22, #0x100 (256)",
+        "mov x26, x20"
       ]
     },
     "lock add word [rax], 0xFFFF": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "ldaddalh w20, w21, [x4]",
-        "mvn w27, w21",
-        "lsl w0, w21, #16",
+        "mov x21, x4",
+        "ldaddalh w20, w22, [x21]",
+        "mvn w21, w22",
+        "mov x27, x21",
+        "lsl w0, w22, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w21, w20"
+        "add w21, w22, w20",
+        "mov x26, x21"
       ]
     },
     "lock add dword [rax], 0x100": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldaddal w20, w27, [x4]",
-        "adds w26, w27, #0x100 (256)"
+        "mov x21, x4",
+        "ldaddal w20, w22, [x21]",
+        "mov x27, x22",
+        "adds w20, w22, #0x100 (256)",
+        "mov x26, x20"
       ]
     },
     "lock add dword [rax], 0xFFFFFFFF": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "ldaddal w20, w21, [x4]",
-        "mvn w27, w21",
-        "adds w26, w21, w20"
+        "mov x21, x4",
+        "ldaddal w20, w22, [x21]",
+        "mvn w21, w22",
+        "mov x27, x21",
+        "adds w21, w22, w20",
+        "mov x26, x21"
       ]
     },
     "lock add qword [rax], 0x100": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldaddal x20, x27, [x4]",
-        "adds x26, x27, #0x100 (256)"
+        "mov x21, x4",
+        "ldaddal x20, x22, [x21]",
+        "mov x27, x22",
+        "adds x20, x22, #0x100 (256)",
+        "mov x26, x20"
       ]
     },
     "lock add qword [rax], -2147483647": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffff80000001",
-        "ldaddal x20, x27, [x4]",
-        "adds x26, x27, x20"
+        "mov x21, x4",
+        "ldaddal x20, x22, [x21]",
+        "mov x27, x22",
+        "adds x21, x22, x20",
+        "mov x26, x21"
       ]
     },
     "lock add word [rax], 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldaddalh w20, w27, [x4]",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "ldaddalh w20, w22, [x21]",
+        "mov x27, x22",
+        "lsl w0, w22, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w27, #0x1 (1)"
+        "add w20, w22, #0x1 (1)",
+        "mov x26, x20"
       ]
     },
     "lock add dword [rax], 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldaddal w20, w27, [x4]",
-        "adds w26, w27, #0x1 (1)"
+        "mov x21, x4",
+        "ldaddal w20, w22, [x21]",
+        "mov x27, x22",
+        "adds w20, w22, #0x1 (1)",
+        "mov x26, x20"
       ]
     },
     "lock add qword [rax], 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldaddal x20, x27, [x4]",
-        "adds x26, x27, #0x1 (1)"
+        "mov x21, x4",
+        "ldaddal x20, x22, [x21]",
+        "mov x27, x22",
+        "adds x20, x22, #0x1 (1)",
+        "mov x26, x20"
       ]
     },
     "lock or byte [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x80 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldsetalb w20, w20, [x4]",
-        "orr w26, w20, #0x1",
-        "cmn wzr, w26, lsl #24"
+        "mov x21, x4",
+        "ldsetalb w20, w22, [x21]",
+        "orr w20, w22, #0x1",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #24"
       ]
     },
     "lock or byte [rax], 0xFF": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x80 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "ldsetalb w20, w20, [x4]",
-        "orr w26, w20, #0xff",
-        "cmn wzr, w26, lsl #24"
+        "mov x21, x4",
+        "ldsetalb w20, w22, [x21]",
+        "orr w20, w22, #0xff",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #24"
       ]
     },
     "lock or word [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldsetalh w20, w20, [x4]",
-        "orr w26, w20, #0x100",
-        "cmn wzr, w26, lsl #16"
+        "mov x21, x4",
+        "ldsetalh w20, w22, [x21]",
+        "orr w20, w22, #0x100",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #16"
       ]
     },
     "lock or word [rax], 0xFFFF": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "ldsetalh w20, w20, [x4]",
-        "orr w26, w20, #0xffff",
-        "cmn wzr, w26, lsl #16"
+        "mov x21, x4",
+        "ldsetalh w20, w22, [x21]",
+        "orr w20, w22, #0xffff",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #16"
       ]
     },
     "lock or dword [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldsetal w20, w20, [x4]",
-        "orr w26, w20, #0x100",
-        "tst w26, w26"
+        "mov x21, x4",
+        "ldsetal w20, w22, [x21]",
+        "orr w20, w22, #0x100",
+        "mov x26, x20",
+        "tst w20, w20"
       ]
     },
     "lock or dword [rax], 0xFFFFFFFF": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "ldsetal w20, w21, [x4]",
-        "orr w26, w21, w20",
-        "tst w26, w26"
+        "mov x21, x4",
+        "ldsetal w20, w22, [x21]",
+        "orr w21, w22, w20",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "lock or qword [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldsetal x20, x20, [x4]",
-        "orr x26, x20, #0x100",
-        "tst x26, x26"
+        "mov x21, x4",
+        "ldsetal x20, x22, [x21]",
+        "orr x20, x22, #0x100",
+        "mov x26, x20",
+        "tst x20, x20"
       ]
     },
     "lock or qword [rax], -2147483647": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffff80000001",
-        "ldsetal x20, x20, [x4]",
-        "orr x26, x20, #0xffffffff80000001",
-        "tst x26, x26"
+        "mov x21, x4",
+        "ldsetal x20, x22, [x21]",
+        "orr x20, x22, #0xffffffff80000001",
+        "mov x26, x20",
+        "tst x20, x20"
       ]
     },
     "lock or word [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldsetalh w20, w20, [x4]",
-        "orr w26, w20, #0x1",
-        "cmn wzr, w26, lsl #16"
+        "mov x21, x4",
+        "ldsetalh w20, w22, [x21]",
+        "orr w20, w22, #0x1",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #16"
       ]
     },
     "lock or dword [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldsetal w20, w20, [x4]",
-        "orr w26, w20, #0x1",
-        "tst w26, w26"
+        "mov x21, x4",
+        "ldsetal w20, w22, [x21]",
+        "orr w20, w22, #0x1",
+        "mov x26, x20",
+        "tst w20, w20"
       ]
     },
     "lock or qword [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldsetal x20, x20, [x4]",
-        "orr x26, x20, #0x1",
-        "tst x26, x26"
+        "mov x21, x4",
+        "ldsetal x20, x22, [x21]",
+        "orr x20, x22, #0x1",
+        "mov x26, x20",
+        "tst x20, x20"
       ]
     },
     "lock adc byte [rax], 1": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 22,
       "Comment": "GROUP1 0x80 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
         "adc w21, wzr, w20",
-        "ldaddalb w21, w27, [x4]",
+        "mov x22, x4",
+        "ldaddalb w21, w23, [x22]",
+        "mov x27, x23",
         "cset w21, hs",
-        "adc w20, w27, w20",
-        "uxtb w26, w20",
-        "cmp w26, #0x1 (1)",
-        "cset x20, lo",
-        "cmp w26, #0x1 (1)",
-        "cset x22, ls",
+        "adc w22, w23, w20",
+        "uxtb w20, w22",
+        "cmp w20, #0x1 (1)",
+        "cset x22, lo",
+        "cmp w20, #0x1 (1)",
+        "cset x24, ls",
         "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
+        "csel x25, x24, x22, eq",
+        "cmn wzr, w20, lsl #24",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w26, w27",
-        "ubfx x21, x21, #7, #1",
-        "orr w20, w20, w21, lsl #28",
-        "msr nzcv, x20"
+        "orr w22, w21, w25, lsl #29",
+        "bic w21, w20, w23",
+        "ubfx x23, x21, #7, #1",
+        "orr w21, w22, w23, lsl #28",
+        "mov x26, x20",
+        "msr nzcv, x21"
       ]
     },
     "lock adc byte [rax], 0xFF": {
-      "ExpectedInstructionCount": 20,
+      "ExpectedInstructionCount": 23,
       "Comment": "GROUP1 0x80 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
         "adc w21, wzr, w20",
-        "ldaddalb w21, w21, [x4]",
-        "mvn w27, w21",
-        "cset w22, hs",
-        "adc w20, w21, w20",
-        "uxtb w26, w20",
-        "cmp w26, #0xff (255)",
-        "cset x20, lo",
-        "cmp w26, #0xff (255)",
-        "cset x23, ls",
-        "cmp x22, #0x1 (1)",
-        "csel x20, x23, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "mrs x22, nzcv",
-        "orr w20, w22, w20, lsl #29",
-        "bic w21, w21, w26",
-        "ubfx x21, x21, #7, #1",
-        "orr w20, w20, w21, lsl #28",
-        "msr nzcv, x20"
+        "mov x22, x4",
+        "ldaddalb w21, w23, [x22]",
+        "mvn w21, w23",
+        "mov x27, x21",
+        "cset w21, hs",
+        "adc w22, w23, w20",
+        "uxtb w20, w22",
+        "cmp w20, #0xff (255)",
+        "cset x22, lo",
+        "cmp w20, #0xff (255)",
+        "cset x24, ls",
+        "cmp x21, #0x1 (1)",
+        "csel x25, x24, x22, eq",
+        "cmn wzr, w20, lsl #24",
+        "mrs x21, nzcv",
+        "orr w22, w21, w25, lsl #29",
+        "bic w21, w23, w20",
+        "ubfx x23, x21, #7, #1",
+        "orr w21, w22, w23, lsl #28",
+        "mov x26, x20",
+        "msr nzcv, x21"
       ]
     },
     "lock adc word [rax], 0x100": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 22,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
         "adc w21, wzr, w20",
-        "ldaddalh w21, w27, [x4]",
+        "mov x22, x4",
+        "ldaddalh w21, w23, [x22]",
+        "mov x27, x23",
         "cset w21, hs",
-        "adc w20, w27, w20",
-        "uxth w26, w20",
-        "cmp w26, #0x100 (256)",
-        "cset x20, lo",
-        "cmp w26, #0x100 (256)",
-        "cset x22, ls",
+        "adc w22, w23, w20",
+        "uxth w20, w22",
+        "cmp w20, #0x100 (256)",
+        "cset x22, lo",
+        "cmp w20, #0x100 (256)",
+        "cset x24, ls",
         "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #16",
+        "csel x25, x24, x22, eq",
+        "cmn wzr, w20, lsl #16",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w26, w27",
-        "ubfx x21, x21, #15, #1",
-        "orr w20, w20, w21, lsl #28",
-        "msr nzcv, x20"
+        "orr w22, w21, w25, lsl #29",
+        "bic w21, w20, w23",
+        "ubfx x23, x21, #15, #1",
+        "orr w21, w22, w23, lsl #28",
+        "mov x26, x20",
+        "msr nzcv, x21"
       ]
     },
     "lock adc word [rax], 0xFFFF": {
-      "ExpectedInstructionCount": 20,
+      "ExpectedInstructionCount": 23,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
         "adc w21, wzr, w20",
-        "ldaddalh w21, w21, [x4]",
-        "mvn w27, w21",
-        "cset w22, hs",
-        "adc w23, w21, w20",
-        "uxth w26, w23",
-        "cmp w26, w20",
-        "cset x23, lo",
-        "cmp w26, w20",
-        "cset x20, ls",
-        "cmp x22, #0x1 (1)",
-        "csel x20, x20, x23, eq",
-        "cmn wzr, w26, lsl #16",
-        "mrs x22, nzcv",
-        "orr w20, w22, w20, lsl #29",
-        "bic w21, w21, w26",
-        "ubfx x21, x21, #15, #1",
-        "orr w20, w20, w21, lsl #28",
+        "mov x22, x4",
+        "ldaddalh w21, w23, [x22]",
+        "mvn w21, w23",
+        "mov x27, x21",
+        "cset w21, hs",
+        "adc w22, w23, w20",
+        "uxth w24, w22",
+        "cmp w24, w20",
+        "cset x22, lo",
+        "cmp w24, w20",
+        "cset x25, ls",
+        "cmp x21, #0x1 (1)",
+        "csel x20, x25, x22, eq",
+        "cmn wzr, w24, lsl #16",
+        "mrs x21, nzcv",
+        "orr w22, w21, w20, lsl #29",
+        "bic w20, w23, w24",
+        "ubfx x21, x20, #15, #1",
+        "orr w20, w22, w21, lsl #28",
+        "mov x26, x24",
         "msr nzcv, x20"
       ]
     },
     "lock adc dword [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
         "adc w21, wzr, w20",
-        "ldaddal w21, w27, [x4]",
-        "adcs w26, w27, w20"
+        "mov x22, x4",
+        "ldaddal w21, w23, [x22]",
+        "mov x27, x23",
+        "adcs w21, w23, w20",
+        "mov x26, x21"
       ]
     },
     "lock adc dword [rax], 0xFFFFFFFF": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
         "adc w21, wzr, w20",
-        "ldaddal w21, w21, [x4]",
-        "mvn w27, w21",
-        "adcs w26, w21, w20"
+        "mov x22, x4",
+        "ldaddal w21, w23, [x22]",
+        "mvn w21, w23",
+        "mov x27, x21",
+        "adcs w21, w23, w20",
+        "mov x26, x21"
       ]
     },
     "lock adc qword [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
         "adc x21, xzr, x20",
-        "ldaddal x21, x27, [x4]",
-        "adcs x26, x27, x20"
+        "mov x22, x4",
+        "ldaddal x21, x23, [x22]",
+        "mov x27, x23",
+        "adcs x21, x23, x20",
+        "mov x26, x21"
       ]
     },
     "lock adc qword [rax], -2147483647": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffff80000001",
         "adc x21, xzr, x20",
-        "ldaddal x21, x27, [x4]",
-        "adcs x26, x27, x20"
+        "mov x22, x4",
+        "ldaddal x21, x23, [x22]",
+        "mov x27, x23",
+        "adcs x21, x23, x20",
+        "mov x26, x21"
       ]
     },
     "lock adc word [rax], 1": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 22,
       "Comment": "GROUP1 0x83 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
         "adc w21, wzr, w20",
-        "ldaddalh w21, w27, [x4]",
+        "mov x22, x4",
+        "ldaddalh w21, w23, [x22]",
+        "mov x27, x23",
         "cset w21, hs",
-        "adc w20, w27, w20",
-        "uxth w26, w20",
-        "cmp w26, #0x1 (1)",
-        "cset x20, lo",
-        "cmp w26, #0x1 (1)",
-        "cset x22, ls",
+        "adc w22, w23, w20",
+        "uxth w20, w22",
+        "cmp w20, #0x1 (1)",
+        "cset x22, lo",
+        "cmp w20, #0x1 (1)",
+        "cset x24, ls",
         "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #16",
+        "csel x25, x24, x22, eq",
+        "cmn wzr, w20, lsl #16",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w26, w27",
-        "ubfx x21, x21, #15, #1",
-        "orr w20, w20, w21, lsl #28",
-        "msr nzcv, x20"
+        "orr w22, w21, w25, lsl #29",
+        "bic w21, w20, w23",
+        "ubfx x23, x21, #15, #1",
+        "orr w21, w22, w23, lsl #28",
+        "mov x26, x20",
+        "msr nzcv, x21"
       ]
     },
     "lock adc dword [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x83 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
         "adc w21, wzr, w20",
-        "ldaddal w21, w27, [x4]",
-        "adcs w26, w27, w20"
+        "mov x22, x4",
+        "ldaddal w21, w23, [x22]",
+        "mov x27, x23",
+        "adcs w21, w23, w20",
+        "mov x26, x21"
       ]
     },
     "lock adc qword [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x83 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
         "adc x21, xzr, x20",
-        "ldaddal x21, x27, [x4]",
-        "adcs x26, x27, x20"
+        "mov x22, x4",
+        "ldaddal x21, x23, [x22]",
+        "mov x27, x23",
+        "adcs x21, x23, x20",
+        "mov x26, x21"
       ]
     },
     "lock sbb byte [rax], 1": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 24,
       "Comment": "GROUP1 0x80 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "adc w21, wzr, w20",
-        "neg w1, w21",
-        "ldaddalb w1, w27, [x4]",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddalb w1, w23, [x21]",
+        "mov x27, x23",
         "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w27, w20",
-        "uxtb w26, w20",
-        "cmp w26, w27",
+        "add w22, w20, w21",
+        "sub w20, w23, w22",
+        "uxtb w22, w20",
+        "cmp w22, w23",
         "cset x20, hi",
-        "cmp w26, w27",
-        "cset x22, hs",
+        "cmp w22, w23",
+        "cset x24, hs",
         "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w27, w26",
-        "ubfx x21, x21, #7, #1",
-        "orr w20, w20, w21, lsl #28",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w22, lsl #24",
+        "mrs x20, nzcv",
+        "orr w21, w20, w25, lsl #29",
+        "bic w20, w23, w22",
+        "ubfx x23, x20, #7, #1",
+        "orr w20, w21, w23, lsl #28",
+        "mov x26, x22",
         "msr nzcv, x20"
       ]
     },
     "lock sbb byte [rax], 0xFF": {
-      "ExpectedInstructionCount": 22,
+      "ExpectedInstructionCount": 25,
       "Comment": "GROUP1 0x80 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "adc w21, wzr, w20",
-        "neg w1, w21",
-        "ldaddalb w1, w21, [x4]",
-        "mvn w27, w21",
-        "cset w22, hs",
-        "add w20, w20, w22",
-        "sub w20, w21, w20",
-        "uxtb w26, w20",
-        "cmp w26, w21",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddalb w1, w23, [x21]",
+        "mvn w21, w23",
+        "mov x27, x21",
+        "cset w21, hs",
+        "add w22, w20, w21",
+        "sub w20, w23, w22",
+        "uxtb w22, w20",
+        "cmp w22, w23",
         "cset x20, hi",
-        "cmp w26, w21",
-        "cset x23, hs",
-        "cmp x22, #0x1 (1)",
-        "csel x20, x23, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "mrs x22, nzcv",
-        "orr w20, w22, w20, lsl #29",
-        "bic w21, w26, w21",
-        "ubfx x21, x21, #7, #1",
-        "orr w20, w20, w21, lsl #28",
+        "cmp w22, w23",
+        "cset x24, hs",
+        "cmp x21, #0x1 (1)",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w22, lsl #24",
+        "mrs x20, nzcv",
+        "orr w21, w20, w25, lsl #29",
+        "bic w20, w22, w23",
+        "ubfx x23, x20, #7, #1",
+        "orr w20, w21, w23, lsl #28",
+        "mov x26, x22",
         "msr nzcv, x20"
       ]
     },
     "lock sbb word [rax], 0x100": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 24,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "adc w21, wzr, w20",
-        "neg w1, w21",
-        "ldaddalh w1, w27, [x4]",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddalh w1, w23, [x21]",
+        "mov x27, x23",
         "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w27, w20",
-        "uxth w26, w20",
-        "cmp w26, w27",
+        "add w22, w20, w21",
+        "sub w20, w23, w22",
+        "uxth w22, w20",
+        "cmp w22, w23",
         "cset x20, hi",
-        "cmp w26, w27",
-        "cset x22, hs",
+        "cmp w22, w23",
+        "cset x24, hs",
         "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #16",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w27, w26",
-        "ubfx x21, x21, #15, #1",
-        "orr w20, w20, w21, lsl #28",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w22, lsl #16",
+        "mrs x20, nzcv",
+        "orr w21, w20, w25, lsl #29",
+        "bic w20, w23, w22",
+        "ubfx x23, x20, #15, #1",
+        "orr w20, w21, w23, lsl #28",
+        "mov x26, x22",
         "msr nzcv, x20"
       ]
     },
     "lock sbb word [rax], 0xFFFF": {
-      "ExpectedInstructionCount": 22,
+      "ExpectedInstructionCount": 25,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "adc w21, wzr, w20",
-        "neg w1, w21",
-        "ldaddalh w1, w21, [x4]",
-        "mvn w27, w21",
-        "cset w22, hs",
-        "add w20, w20, w22",
-        "sub w20, w21, w20",
-        "uxth w26, w20",
-        "cmp w26, w21",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddalh w1, w23, [x21]",
+        "mvn w21, w23",
+        "mov x27, x21",
+        "cset w21, hs",
+        "add w22, w20, w21",
+        "sub w20, w23, w22",
+        "uxth w22, w20",
+        "cmp w22, w23",
         "cset x20, hi",
-        "cmp w26, w21",
-        "cset x23, hs",
-        "cmp x22, #0x1 (1)",
-        "csel x20, x23, x20, eq",
-        "cmn wzr, w26, lsl #16",
-        "mrs x22, nzcv",
-        "orr w20, w22, w20, lsl #29",
-        "bic w21, w26, w21",
-        "ubfx x21, x21, #15, #1",
-        "orr w20, w20, w21, lsl #28",
+        "cmp w22, w23",
+        "cset x24, hs",
+        "cmp x21, #0x1 (1)",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w22, lsl #16",
+        "mrs x20, nzcv",
+        "orr w21, w20, w25, lsl #29",
+        "bic w20, w22, w23",
+        "ubfx x23, x20, #15, #1",
+        "orr w20, w21, w23, lsl #28",
+        "mov x26, x22",
         "msr nzcv, x20"
       ]
     },
     "lock sbb dword [rax], 0x100": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "adc w21, wzr, w20",
-        "neg w1, w21",
-        "ldaddal w1, w27, [x4]",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddal w1, w23, [x21]",
+        "mov x27, x23",
         "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs w26, w27, w20",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "sbcs w21, w23, w20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x26, x21",
+        "msr nzcv, x22"
       ]
     },
     "lock sbb dword [rax], 0xFFFFFFFF": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 15,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "adc w21, wzr, w20",
-        "neg w1, w21",
-        "ldaddal w1, w21, [x4]",
-        "mvn w27, w21",
-        "mrs x22, nzcv",
-        "eor w22, w22, #0x20000000",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddal w1, w23, [x21]",
+        "mvn w21, w23",
+        "mov x27, x21",
+        "mrs x21, nzcv",
+        "eor w22, w21, #0x20000000",
         "msr nzcv, x22",
-        "sbcs w26, w21, w20",
+        "sbcs w21, w23, w20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x26, x21",
+        "msr nzcv, x22"
       ]
     },
     "lock sbb qword [rax], 0x100": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "adc x21, xzr, x20",
-        "neg x1, x21",
-        "ldaddal x1, x27, [x4]",
+        "mov x21, x4",
+        "adc x22, xzr, x20",
+        "neg x1, x22",
+        "ldaddal x1, x23, [x21]",
+        "mov x27, x23",
         "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs x26, x27, x20",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "sbcs x21, x23, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x26, x21",
+        "msr nzcv, x22"
       ]
     },
     "lock sbb qword [rax], -2147483647": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffff80000001",
-        "adc x21, xzr, x20",
-        "neg x1, x21",
-        "ldaddal x1, x27, [x4]",
+        "mov x21, x4",
+        "adc x22, xzr, x20",
+        "neg x1, x22",
+        "ldaddal x1, x23, [x21]",
+        "mov x27, x23",
         "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs x26, x27, x20",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "sbcs x21, x23, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x26, x21",
+        "msr nzcv, x22"
       ]
     },
     "lock sbb word [rax], 1": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 24,
       "Comment": "GROUP1 0x83 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "adc w21, wzr, w20",
-        "neg w1, w21",
-        "ldaddalh w1, w27, [x4]",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddalh w1, w23, [x21]",
+        "mov x27, x23",
         "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w27, w20",
-        "uxth w26, w20",
-        "cmp w26, w27",
+        "add w22, w20, w21",
+        "sub w20, w23, w22",
+        "uxth w22, w20",
+        "cmp w22, w23",
         "cset x20, hi",
-        "cmp w26, w27",
-        "cset x22, hs",
+        "cmp w22, w23",
+        "cset x24, hs",
         "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #16",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w27, w26",
-        "ubfx x21, x21, #15, #1",
-        "orr w20, w20, w21, lsl #28",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w22, lsl #16",
+        "mrs x20, nzcv",
+        "orr w21, w20, w25, lsl #29",
+        "bic w20, w23, w22",
+        "ubfx x23, x20, #15, #1",
+        "orr w20, w21, w23, lsl #28",
+        "mov x26, x22",
         "msr nzcv, x20"
       ]
     },
     "lock sbb dword [rax], 1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP1 0x83 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "adc w21, wzr, w20",
-        "neg w1, w21",
-        "ldaddal w1, w27, [x4]",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddal w1, w23, [x21]",
+        "mov x27, x23",
         "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs w26, w27, w20",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "sbcs w21, w23, w20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x26, x21",
+        "msr nzcv, x22"
       ]
     },
     "lock sbb qword [rax], 1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP1 0x83 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "adc x21, xzr, x20",
-        "neg x1, x21",
-        "ldaddal x1, x27, [x4]",
+        "mov x21, x4",
+        "adc x22, xzr, x20",
+        "neg x1, x22",
+        "ldaddal x1, x23, [x21]",
+        "mov x27, x23",
         "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs x26, x27, x20",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "sbcs x21, x23, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x26, x21",
+        "msr nzcv, x22"
       ]
     },
     "lock and byte [rax], 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x80 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
+        "mov x21, x4",
         "mvn w1, w20",
-        "ldclralb w1, w20, [x4]",
-        "and w26, w20, #0x1",
-        "cmn wzr, w26, lsl #24"
+        "ldclralb w1, w22, [x21]",
+        "and w20, w22, #0x1",
+        "cmn wzr, w20, lsl #24",
+        "mov x26, x20"
       ]
     },
     "lock and byte [rax], 0xFF": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x80 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
+        "mov x21, x4",
         "mvn w1, w20",
-        "ldclralb w1, w20, [x4]",
-        "and w26, w20, #0xff",
-        "cmn wzr, w26, lsl #24"
+        "ldclralb w1, w22, [x21]",
+        "and w20, w22, #0xff",
+        "cmn wzr, w20, lsl #24",
+        "mov x26, x20"
       ]
     },
     "lock and word [rax], 0x100": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
+        "mov x21, x4",
         "mvn w1, w20",
-        "ldclralh w1, w20, [x4]",
-        "and w26, w20, #0x100",
-        "cmn wzr, w26, lsl #16"
+        "ldclralh w1, w22, [x21]",
+        "and w20, w22, #0x100",
+        "cmn wzr, w20, lsl #16",
+        "mov x26, x20"
       ]
     },
     "lock and word [rax], 0xFFFF": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
+        "mov x21, x4",
         "mvn w1, w20",
-        "ldclralh w1, w20, [x4]",
-        "and w26, w20, #0xffff",
-        "cmn wzr, w26, lsl #16"
+        "ldclralh w1, w22, [x21]",
+        "and w20, w22, #0xffff",
+        "cmn wzr, w20, lsl #16",
+        "mov x26, x20"
       ]
     },
     "lock and dword [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
+        "mov x21, x4",
         "mvn w1, w20",
-        "ldclral w1, w20, [x4]",
-        "ands w26, w20, #0x100"
+        "ldclral w1, w22, [x21]",
+        "ands w20, w22, #0x100",
+        "mov x26, x20"
       ]
     },
     "lock and dword [rax], 0xFFFFFFFF": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
+        "mov x21, x4",
         "mvn w1, w20",
-        "ldclral w1, w21, [x4]",
-        "ands w26, w21, w20"
+        "ldclral w1, w22, [x21]",
+        "ands w21, w22, w20",
+        "mov x26, x21"
       ]
     },
     "lock and qword [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
+        "mov x21, x4",
         "mvn x1, x20",
-        "ldclral x1, x20, [x4]",
-        "ands x26, x20, #0x100"
+        "ldclral x1, x22, [x21]",
+        "ands x20, x22, #0x100",
+        "mov x26, x20"
       ]
     },
     "lock and qword [rax], -2147483647": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffff80000001",
+        "mov x21, x4",
         "mvn x1, x20",
-        "ldclral x1, x20, [x4]",
-        "ands x26, x20, #0xffffffff80000001"
+        "ldclral x1, x22, [x21]",
+        "ands x20, x22, #0xffffffff80000001",
+        "mov x26, x20"
       ]
     },
     "lock and word [rax], 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x83 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
+        "mov x21, x4",
         "mvn w1, w20",
-        "ldclralh w1, w20, [x4]",
-        "and w26, w20, #0x1",
-        "cmn wzr, w26, lsl #16"
+        "ldclralh w1, w22, [x21]",
+        "and w20, w22, #0x1",
+        "cmn wzr, w20, lsl #16",
+        "mov x26, x20"
       ]
     },
     "lock and dword [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
+        "mov x21, x4",
         "mvn w1, w20",
-        "ldclral w1, w20, [x4]",
-        "ands w26, w20, #0x1"
+        "ldclral w1, w22, [x21]",
+        "ands w20, w22, #0x1",
+        "mov x26, x20"
       ]
     },
     "lock and qword [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
+        "mov x21, x4",
         "mvn x1, x20",
-        "ldclral x1, x20, [x4]",
-        "ands x26, x20, #0x1"
+        "ldclral x1, x22, [x21]",
+        "ands x20, x22, #0x1",
+        "mov x26, x20"
       ]
     },
     "lock sub byte [rax], 1": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP1 0x80 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
+        "mov x21, x4",
         "neg w1, w20",
-        "ldaddalb w1, w27, [x4]",
-        "lsl w0, w27, #24",
+        "ldaddalb w1, w22, [x21]",
+        "mov x27, x22",
+        "lsl w0, w22, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w27, #0x1 (1)",
+        "sub w20, w22, #0x1 (1)",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock sub byte [rax], 0xFF": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 13,
       "Comment": "GROUP1 0x80 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
+        "mov x21, x4",
         "neg w1, w20",
-        "ldaddalb w1, w21, [x4]",
-        "mvn w27, w21",
-        "lsl w0, w21, #24",
+        "ldaddalb w1, w22, [x21]",
+        "mvn w21, w22",
+        "mov x27, x21",
+        "lsl w0, w22, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w21, #0xff (255)",
+        "sub w20, w22, #0xff (255)",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock sub word [rax], 0x100": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
+        "mov x21, x4",
         "neg w1, w20",
-        "ldaddalh w1, w27, [x4]",
-        "lsl w0, w27, #16",
+        "ldaddalh w1, w22, [x21]",
+        "mov x27, x22",
+        "lsl w0, w22, #16",
         "cmp w0, w20, lsl #16",
-        "sub w26, w27, #0x100 (256)",
+        "sub w20, w22, #0x100 (256)",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock sub word [rax], 0xFFFF": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 13,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
+        "mov x21, x4",
         "neg w1, w20",
-        "ldaddalh w1, w21, [x4]",
-        "mvn w27, w21",
-        "lsl w0, w21, #16",
+        "ldaddalh w1, w22, [x21]",
+        "mvn w21, w22",
+        "mov x27, x21",
+        "lsl w0, w22, #16",
         "cmp w0, w20, lsl #16",
-        "sub w26, w21, w20",
+        "sub w21, w22, w20",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock sub dword [rax], 0x100": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
+        "mov x21, x4",
         "neg w1, w20",
-        "ldaddal w1, w27, [x4]",
-        "subs w26, w27, #0x100 (256)",
+        "ldaddal w1, w22, [x21]",
+        "mov x27, x22",
+        "subs w20, w22, #0x100 (256)",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock sub dword [rax], 0xFFFFFFFF": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
+        "mov x21, x4",
         "neg w1, w20",
-        "ldaddal w1, w21, [x4]",
-        "mvn w27, w21",
-        "subs w26, w21, w20",
+        "ldaddal w1, w22, [x21]",
+        "mvn w21, w22",
+        "mov x27, x21",
+        "subs w21, w22, w20",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock sub qword [rax], 0x100": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
+        "mov x21, x4",
         "neg x1, x20",
-        "ldaddal x1, x27, [x4]",
-        "subs x26, x27, #0x100 (256)",
+        "ldaddal x1, x22, [x21]",
+        "mov x27, x22",
+        "subs x20, x22, #0x100 (256)",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock sub qword [rax], -2147483647": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffff80000001",
+        "mov x21, x4",
         "neg x1, x20",
-        "ldaddal x1, x27, [x4]",
-        "subs x26, x27, x20",
+        "ldaddal x1, x22, [x21]",
+        "mov x27, x22",
+        "subs x21, x22, x20",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock sub word [rax], 1": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP1 0x83 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
+        "mov x21, x4",
         "neg w1, w20",
-        "ldaddalh w1, w27, [x4]",
-        "lsl w0, w27, #16",
+        "ldaddalh w1, w22, [x21]",
+        "mov x27, x22",
+        "lsl w0, w22, #16",
         "cmp w0, w20, lsl #16",
-        "sub w26, w27, #0x1 (1)",
+        "sub w20, w22, #0x1 (1)",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock sub dword [rax], 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x83 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
+        "mov x21, x4",
         "neg w1, w20",
-        "ldaddal w1, w27, [x4]",
-        "subs w26, w27, #0x1 (1)",
+        "ldaddal w1, w22, [x21]",
+        "mov x27, x22",
+        "subs w20, w22, #0x1 (1)",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock sub qword [rax], 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x83 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
+        "mov x21, x4",
         "neg x1, x20",
-        "ldaddal x1, x27, [x4]",
-        "subs x26, x27, #0x1 (1)",
+        "ldaddal x1, x22, [x21]",
+        "mov x27, x22",
+        "subs x20, x22, #0x1 (1)",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock xor byte [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x80 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldeoralb w20, w20, [x4]",
-        "eor w26, w20, #0x1",
-        "cmn wzr, w26, lsl #24"
+        "mov x21, x4",
+        "ldeoralb w20, w22, [x21]",
+        "eor w20, w22, #0x1",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #24"
       ]
     },
     "lock xor byte [rax], 0xFF": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x80 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "ldeoralb w20, w20, [x4]",
-        "eor w26, w20, #0xff",
-        "cmn wzr, w26, lsl #24"
+        "mov x21, x4",
+        "ldeoralb w20, w22, [x21]",
+        "eor w20, w22, #0xff",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #24"
       ]
     },
     "lock xor word [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldeoralh w20, w20, [x4]",
-        "eor w26, w20, #0x100",
-        "cmn wzr, w26, lsl #16"
+        "mov x21, x4",
+        "ldeoralh w20, w22, [x21]",
+        "eor w20, w22, #0x100",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #16"
       ]
     },
     "lock xor word [rax], 0xFFFF": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "ldeoralh w20, w20, [x4]",
-        "eor w26, w20, #0xffff",
-        "cmn wzr, w26, lsl #16"
+        "mov x21, x4",
+        "ldeoralh w20, w22, [x21]",
+        "eor w20, w22, #0xffff",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #16"
       ]
     },
     "lock xor dword [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldeoral w20, w20, [x4]",
-        "eor w26, w20, #0x100",
-        "tst w26, w26"
+        "mov x21, x4",
+        "ldeoral w20, w22, [x21]",
+        "eor w20, w22, #0x100",
+        "mov x26, x20",
+        "tst w20, w20"
       ]
     },
     "lock xor dword [rax], 0xFFFFFFFF": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "ldeoral w20, w21, [x4]",
-        "eor w26, w21, w20",
-        "tst w26, w26"
+        "mov x21, x4",
+        "ldeoral w20, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "lock xor qword [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldeoral x20, x20, [x4]",
-        "eor x26, x20, #0x100",
-        "tst x26, x26"
+        "mov x21, x4",
+        "ldeoral x20, x22, [x21]",
+        "eor x20, x22, #0x100",
+        "mov x26, x20",
+        "tst x20, x20"
       ]
     },
     "lock xor qword [rax], -2147483647": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffff80000001",
-        "ldeoral x20, x20, [x4]",
-        "eor x26, x20, #0xffffffff80000001",
-        "tst x26, x26"
+        "mov x21, x4",
+        "ldeoral x20, x22, [x21]",
+        "eor x20, x22, #0xffffffff80000001",
+        "mov x26, x20",
+        "tst x20, x20"
       ]
     },
     "lock xor word [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldeoralh w20, w20, [x4]",
-        "eor w26, w20, #0x1",
-        "cmn wzr, w26, lsl #16"
+        "mov x21, x4",
+        "ldeoralh w20, w22, [x21]",
+        "eor w20, w22, #0x1",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #16"
       ]
     },
     "lock xor dword [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldeoral w20, w20, [x4]",
-        "eor w26, w20, #0x1",
-        "tst w26, w26"
+        "mov x21, x4",
+        "ldeoral w20, w22, [x21]",
+        "eor w20, w22, #0x1",
+        "mov x26, x20",
+        "tst w20, w20"
       ]
     },
     "lock xor qword [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldeoral x20, x20, [x4]",
-        "eor x26, x20, #0x1",
-        "tst x26, x26"
+        "mov x21, x4",
+        "ldeoral x20, x22, [x21]",
+        "eor x20, x22, #0x1",
+        "mov x26, x20",
+        "tst x20, x20"
       ]
     },
     "lock dec byte [rax]": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP3 0xfe /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov w21, #0xff",
-        "ldaddalb w21, w27, [x4]",
+        "mov x21, x4",
+        "mov w22, #0xff",
+        "ldaddalb w22, w23, [x21]",
         "cset w21, hs",
-        "lsl w0, w27, #24",
+        "mov x27, x23",
+        "lsl w0, w23, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w27, #0x1 (1)",
+        "sub w20, w23, #0x1 (1)",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "bfi w20, w21, #29, #1",
-        "msr nzcv, x20"
+        "mov w22, w20",
+        "bfi w22, w21, #29, #1",
+        "msr nzcv, x22"
       ]
     },
     "lock not byte [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf6 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "steorlb w20, [x4]"
+        "mov x21, x4",
+        "steorlb w20, [x21]"
       ]
     },
     "lock not word [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf7 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "steorlh w20, [x4]"
+        "mov x21, x4",
+        "steorlh w20, [x21]"
       ]
     },
     "lock not dword [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf7 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "steorl w20, [x4]"
+        "mov x21, x4",
+        "steorl w20, [x21]"
       ]
     },
     "lock not qword [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf7 /2",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "steorl x20, [x4]"
+        "mov x21, x4",
+        "steorl x20, [x21]"
       ]
     },
     "lock neg byte [rax]": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 13,
       "Comment": "GROUP2 0xf6 /3",
       "ExpectedArm64ASM": [
-        "ldaxrb w1, [x4]",
+        "mov x20, x4",
+        "ldaxrb w1, [x20]",
         "neg w2, w1",
-        "stlxrb w3, w2, [x4]",
+        "stlxrb w3, w2, [x20]",
         "cbnz w3, #-0xc",
-        "mov w27, w1",
-        "cmp wzr, w27, lsl #24",
-        "neg w26, w27",
+        "mov w21, w1",
+        "mov x27, x21",
+        "cmp wzr, w21, lsl #24",
+        "neg w20, w21",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock neg word [rax]": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 13,
       "Comment": "GROUP2 0xf7 /3",
       "ExpectedArm64ASM": [
-        "ldaxrh w1, [x4]",
+        "mov x20, x4",
+        "ldaxrh w1, [x20]",
         "neg w2, w1",
-        "stlxrh w3, w2, [x4]",
+        "stlxrh w3, w2, [x20]",
         "cbnz w3, #-0xc",
-        "mov w27, w1",
-        "cmp wzr, w27, lsl #16",
-        "neg w26, w27",
+        "mov w21, w1",
+        "mov x27, x21",
+        "cmp wzr, w21, lsl #16",
+        "neg w20, w21",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock neg dword [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP2 0xf7 /3",
       "ExpectedArm64ASM": [
-        "ldaxr w1, [x4]",
+        "mov x20, x4",
+        "ldaxr w1, [x20]",
         "neg w2, w1",
-        "stlxr w3, w2, [x4]",
+        "stlxr w3, w2, [x20]",
         "cbnz w3, #-0xc",
-        "mov w27, w1",
-        "negs w26, w27",
+        "mov w21, w1",
+        "mov x27, x21",
+        "negs w20, w21",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock neg qword [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP2 0xf7 /3",
       "ExpectedArm64ASM": [
-        "ldaxr x1, [x4]",
+        "mov x20, x4",
+        "ldaxr x1, [x20]",
         "neg x2, x1",
-        "stlxr w3, x2, [x4]",
+        "stlxr w3, x2, [x20]",
         "cbnz x3, #-0xc",
-        "mov x27, x1",
-        "negs x26, x27",
+        "mov x21, x1",
+        "mov x27, x21",
+        "negs x20, x21",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "lock dec word [rax]": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP4 0xfe /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov w21, #0xffff",
-        "ldaddalh w21, w27, [x4]",
+        "mov x21, x4",
+        "mov w22, #0xffff",
+        "ldaddalh w22, w23, [x21]",
         "cset w21, hs",
-        "lsl w0, w27, #16",
+        "mov x27, x23",
+        "lsl w0, w23, #16",
         "cmp w0, w20, lsl #16",
-        "sub w26, w27, #0x1 (1)",
+        "sub w20, w23, #0x1 (1)",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "bfi w20, w21, #29, #1",
-        "msr nzcv, x20"
+        "mov w22, w20",
+        "bfi w22, w21, #29, #1",
+        "msr nzcv, x22"
       ]
     },
     "lock dec dword [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP4 0xfe /1",
       "ExpectedArm64ASM": [
-        "mov w20, #0xffffffff",
-        "ldaddal w20, w27, [x4]",
+        "mov x20, x4",
+        "mov w21, #0xffffffff",
+        "ldaddal w21, w22, [x20]",
         "cset w20, hs",
-        "subs w26, w27, #0x1 (1)",
+        "mov x27, x22",
+        "subs w21, w22, #0x1 (1)",
+        "mov x26, x21",
         "mrs x21, nzcv",
-        "mov w0, w21",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
-        "msr nzcv, x20"
+        "mov w22, w21",
+        "bfi w22, w20, #29, #1",
+        "msr nzcv, x22"
       ]
     },
     "lock dec qword [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP4 0xfe /1",
       "ExpectedArm64ASM": [
-        "mov x20, #0xffffffffffffffff",
-        "ldaddal x20, x27, [x4]",
+        "mov x20, x4",
+        "mov x21, #0xffffffffffffffff",
+        "ldaddal x21, x22, [x20]",
         "cset w20, hs",
-        "subs x26, x27, #0x1 (1)",
+        "mov x27, x22",
+        "subs x21, x22, #0x1 (1)",
+        "mov x26, x21",
         "mrs x21, nzcv",
-        "mov w0, w21",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
-        "msr nzcv, x20"
+        "mov w22, w21",
+        "bfi w22, w20, #29, #1",
+        "msr nzcv, x22"
       ]
     },
     "lock inc byte [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": "GROUP4 0xfe /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldaddalb w20, w27, [x4]",
+        "mov x21, x4",
+        "ldaddalb w20, w22, [x21]",
         "cset w21, hs",
-        "lsl w0, w27, #24",
+        "mov x27, x22",
+        "lsl w0, w22, #24",
         "cmn w0, w20, lsl #24",
-        "add w26, w27, #0x1 (1)",
+        "add w20, w22, #0x1 (1)",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "bfi w20, w21, #29, #1",
-        "msr nzcv, x20"
+        "mov w22, w20",
+        "bfi w22, w21, #29, #1",
+        "msr nzcv, x22"
       ]
     },
     "lock inc word [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": "GROUP4 0xfe /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldaddalh w20, w27, [x4]",
+        "mov x21, x4",
+        "ldaddalh w20, w22, [x21]",
         "cset w21, hs",
-        "lsl w0, w27, #16",
+        "mov x27, x22",
+        "lsl w0, w22, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w27, #0x1 (1)",
+        "add w20, w22, #0x1 (1)",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "bfi w20, w21, #29, #1",
-        "msr nzcv, x20"
+        "mov w22, w20",
+        "bfi w22, w21, #29, #1",
+        "msr nzcv, x22"
       ]
     },
     "lock inc dword [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP4 0xfe /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldaddal w20, w27, [x4]",
+        "mov x21, x4",
+        "ldaddal w20, w22, [x21]",
         "cset w20, hs",
-        "adds w26, w27, #0x1 (1)",
+        "mov x27, x22",
+        "adds w21, w22, #0x1 (1)",
+        "mov x26, x21",
         "mrs x21, nzcv",
-        "mov w0, w21",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
-        "msr nzcv, x20"
+        "mov w22, w21",
+        "bfi w22, w20, #29, #1",
+        "msr nzcv, x22"
       ]
     },
     "lock inc qword [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP4 0xfe /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldaddal x20, x27, [x4]",
+        "mov x21, x4",
+        "ldaddal x20, x22, [x21]",
         "cset w20, hs",
-        "adds x26, x27, #0x1 (1)",
+        "mov x27, x22",
+        "adds x21, x22, #0x1 (1)",
+        "mov x26, x21",
         "mrs x21, nzcv",
-        "mov w0, w21",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
-        "msr nzcv, x20"
+        "mov w22, w21",
+        "bfi w22, w20, #29, #1",
+        "msr nzcv, x22"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/Crypto/H0F38.json b/unittests/InstructionCountCI/Crypto/H0F38.json
index 4e573ad3e6..f3f73f03d7 100644
--- a/unittests/InstructionCountCI/Crypto/H0F38.json
+++ b/unittests/InstructionCountCI/Crypto/H0F38.json
@@ -12,126 +12,167 @@
   },
   "Instructions": {
     "sha1nexte xmm0, xmm1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0x66 0x0f 0x38 0xc8"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[3]",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[3]",
         "unimplemented (Unimplemented)",
-        "dup v2.4s, v2.s[0]",
-        "add v2.4s, v17.4s, v2.4s",
-        "mov v16.16b, v17.16b",
-        "mov v16.s[3], v2.s[3]"
+        "dup v4.4s, v2.s[0]",
+        "add v2.4s, v3.4s, v4.4s",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[3], v2.s[3]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "sha256msg1 xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x66 0x0f 0x38 0xcc"
       ],
       "ExpectedArm64ASM": [
-        "unimplemented (Unimplemented)"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v0.16b, v2.16b",
+        "unimplemented (Unimplemented)",
+        "mov v4.16b, v0.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "aesimc xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x38 0xdb"
       ],
       "ExpectedArm64ASM": [
-        "unimplemented (Unimplemented)"
+        "mov v2.16b, v17.16b",
+        "unimplemented (Unimplemented)",
+        "mov v16.16b, v3.16b"
       ]
     },
     "aesenc xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x66 0x0f 0x38 0xdc"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "movi v4.2d, #0x0",
+        "mov v0.16b, v2.16b",
         "unimplemented (Unimplemented)",
         "unimplemented (Unimplemented)",
-        "eor v16.16b, v16.16b, v17.16b"
+        "eor v5.16b, v0.16b, v3.16b",
+        "mov v16.16b, v5.16b"
       ]
     },
     "aesenclast xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x38 0xdd"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "movi v4.2d, #0x0",
+        "mov v0.16b, v2.16b",
         "unimplemented (Unimplemented)",
-        "eor v16.16b, v16.16b, v17.16b"
+        "eor v5.16b, v0.16b, v3.16b",
+        "mov v16.16b, v5.16b"
       ]
     },
     "aesdec xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x66 0x0f 0x38 0xde"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "movi v4.2d, #0x0",
+        "mov v0.16b, v2.16b",
         "unimplemented (Unimplemented)",
         "unimplemented (Unimplemented)",
-        "eor v16.16b, v16.16b, v17.16b"
+        "eor v5.16b, v0.16b, v3.16b",
+        "mov v16.16b, v5.16b"
       ]
     },
     "aesdeclast xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x38 0xdf"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "movi v4.2d, #0x0",
+        "mov v0.16b, v2.16b",
         "unimplemented (Unimplemented)",
-        "eor v16.16b, v16.16b, v17.16b"
+        "eor v5.16b, v0.16b, v3.16b",
+        "mov v16.16b, v5.16b"
       ]
     },
     "crc32 eax, bl": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0xf2 0x0f 0x38 0xf0"
       ],
       "ExpectedArm64ASM": [
-        "crc32cb w4, w4, w7"
+        "mov x20, x4",
+        "mov x21, x7",
+        "crc32cb w22, w20, w21",
+        "mov x4, x22"
       ]
     },
     "crc32 eax, bx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0xf2 0x0f 0x38 0xf1"
       ],
       "ExpectedArm64ASM": [
-        "crc32ch w4, w4, w7"
+        "mov x20, x4",
+        "mov x21, x7",
+        "crc32ch w22, w20, w21",
+        "mov x4, x22"
       ]
     },
     "crc32 eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0xf2 0x0f 0x38 0xf1"
       ],
       "ExpectedArm64ASM": [
-        "crc32cw w4, w4, w7"
+        "mov x20, x4",
+        "mov x21, x7",
+        "crc32cw w22, w20, w21",
+        "mov x4, x22"
       ]
     },
     "crc32 rax, bl": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0xf2 0x0f 0x38 0xf0"
       ],
       "ExpectedArm64ASM": [
-        "crc32cb w4, w4, w7"
+        "mov x20, x4",
+        "mov x21, x7",
+        "crc32cb w22, w20, w21",
+        "mov x4, x22"
       ]
     },
     "crc32 rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0xf2 0x0f 0x38 0xf1"
       ],
       "ExpectedArm64ASM": [
-        "crc32cx w4, w4, x7"
+        "mov x20, x4",
+        "mov x21, x7",
+        "crc32cx w22, w20, x21",
+        "mov x4, x22"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/Crypto/H0F3A.json b/unittests/InstructionCountCI/Crypto/H0F3A.json
index f427a56faa..91948491b8 100644
--- a/unittests/InstructionCountCI/Crypto/H0F3A.json
+++ b/unittests/InstructionCountCI/Crypto/H0F3A.json
@@ -12,70 +12,86 @@
   },
   "Instructions": {
     "pclmulqdq xmm0, xmm1, 00000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x3a 0x44"
       ],
       "ExpectedArm64ASM": [
-        "pmull v16.1q, v16.1d, v17.1d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "pmull v4.1q, v2.1d, v3.1d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pclmulqdq xmm0, xmm1, 00001b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x44"
       ],
       "ExpectedArm64ASM": [
-        "dup v0.2d, v16.d[1]",
-        "pmull v16.1q, v0.1d, v17.1d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v0.2d, v2.d[1]",
+        "pmull v4.1q, v0.1d, v3.1d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pclmulqdq xmm0, xmm1, 10000b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x44"
       ],
       "ExpectedArm64ASM": [
-        "dup v0.2d, v17.d[1]",
-        "pmull v16.1q, v0.1d, v16.1d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v0.2d, v3.d[1]",
+        "pmull v4.1q, v0.1d, v2.1d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pclmulqdq xmm0, xmm1, 10001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x3a 0x44"
       ],
       "ExpectedArm64ASM": [
-        "pmull2 v16.1q, v16.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "pmull2 v4.1q, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "aeskeygenassist xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x3a 0xdf"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2160]",
-        "movi v3.2d, #0x0",
-        "mov v16.16b, v17.16b",
+        "mov v2.16b, v17.16b",
+        "ldr q3, [x28, #2160]",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v2.16b",
         "unimplemented (Unimplemented)",
-        "tbl v16.16b, {v16.16b}, v2.16b"
+        "tbl v5.16b, {v5.16b}, v3.16b",
+        "mov v16.16b, v5.16b"
       ]
     },
     "aeskeygenassist xmm0, xmm1, 0xFF": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0x66 0x0f 0x3a 0xdf"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2160]",
-        "movi v3.2d, #0x0",
-        "mov v16.16b, v17.16b",
+        "mov v2.16b, v17.16b",
+        "ldr q3, [x28, #2160]",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v2.16b",
         "unimplemented (Unimplemented)",
-        "tbl v16.16b, {v16.16b}, v2.16b",
+        "tbl v5.16b, {v5.16b}, v3.16b",
         "mov x0, #0xff00000000",
         "dup v1.2d, x0",
-        "eor v16.16b, v16.16b, v1.16b"
+        "eor v5.16b, v5.16b, v1.16b",
+        "mov v16.16b, v5.16b"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/DDD.json b/unittests/InstructionCountCI/DDD.json
index d73d5630a6..4176940902 100644
--- a/unittests/InstructionCountCI/DDD.json
+++ b/unittests/InstructionCountCI/DDD.json
@@ -21,10 +21,10 @@
       ],
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
-        "uzp1 v2.4h, v2.4h, v2.4h",
-        "sxtl v2.4s, v2.4h",
-        "scvtf v2.2s, v2.2s",
-        "str d2, [x28, #768]"
+        "uzp1 v3.4h, v2.4h, v2.4h",
+        "sxtl v2.4s, v3.4h",
+        "scvtf v3.2s, v2.2s",
+        "str d3, [x28, #768]"
       ]
     },
     "pi2fd mm0, mm1": {
@@ -34,8 +34,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
-        "scvtf v2.2s, v2.2s",
-        "str d2, [x28, #768]"
+        "scvtf v3.2s, v2.2s",
+        "str d3, [x28, #768]"
       ]
     },
     "pf2iw mm0, mm1": {
@@ -45,10 +45,10 @@
       ],
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
-        "fcvtzs v2.2s, v2.2s",
-        "uzp1 v2.4h, v2.4h, v2.4h",
-        "sxtl v2.4s, v2.4h",
-        "str d2, [x28, #768]"
+        "fcvtzs v3.2s, v2.2s",
+        "uzp1 v2.4h, v3.4h, v3.4h",
+        "sxtl v3.4s, v2.4h",
+        "str d3, [x28, #768]"
       ]
     },
     "pf2id mm0, mm1": {
@@ -58,8 +58,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
-        "fcvtzs v2.2s, v2.2s",
-        "str d2, [x28, #768]"
+        "fcvtzs v3.2s, v2.2s",
+        "str d3, [x28, #768]"
       ]
     },
     "pfrcpv mm0, mm1": {
@@ -70,8 +70,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "fmov v0.4s, #0x70 (1.0000)",
-        "fdiv v2.4s, v0.4s, v2.4s",
-        "str d2, [x28, #768]"
+        "fdiv v3.4s, v0.4s, v2.4s",
+        "str d3, [x28, #768]"
       ]
     },
     "pfrsqrtv mm0, mm1": {
@@ -83,8 +83,8 @@
         "ldr d2, [x28, #784]",
         "fmov v0.4s, #0x70 (1.0000)",
         "fsqrt v1.4s, v2.4s",
-        "fdiv v2.4s, v0.4s, v1.4s",
-        "str d2, [x28, #768]"
+        "fdiv v3.4s, v0.4s, v1.4s",
+        "str d3, [x28, #768]"
       ]
     },
     "pfnacc mm0, mm1": {
@@ -94,22 +94,23 @@
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
         "uzp1 v4.2s, v2.2s, v3.2s",
-        "uzp2 v2.2s, v2.2s, v3.2s",
-        "fsub v2.4s, v4.4s, v2.4s",
+        "uzp2 v5.2s, v2.2s, v3.2s",
+        "fsub v2.4s, v4.4s, v5.4s",
         "str d2, [x28, #768]"
       ]
     },
     "pfpnacc mm0, mm1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0x0f 0x8e",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
         "dup v4.2s, v2.s[1]",
-        "fsub s2, s2, s4",
-        "faddp v3.4s, v3.4s, v3.4s",
-        "mov v2.s[1], v3.s[0]",
-        "str d2, [x28, #768]"
+        "fsub s5, s2, s4",
+        "faddp v2.4s, v3.4s, v3.4s",
+        "mov v3.16b, v5.16b",
+        "mov v3.s[1], v2.s[0]",
+        "str d3, [x28, #768]"
       ]
     },
     "pfcmpge mm0, mm1": {
@@ -118,19 +119,20 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "fcmge v2.4s, v3.4s, v2.4s",
-        "str d2, [x28, #768]"
+        "fcmge v4.4s, v3.4s, v2.4s",
+        "str d4, [x28, #768]"
       ]
     },
     "pfmin mm0, mm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x0f 0x94",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "fcmgt v0.4s, v3.4s, v2.4s",
-        "bif v2.16b, v3.16b, v0.16b",
-        "str d2, [x28, #768]"
+        "fcmgt v0.4s, v2.4s, v3.4s",
+        "mov v4.16b, v3.16b",
+        "bif v4.16b, v2.16b, v0.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "pfrcp mm0, mm1": {
@@ -141,8 +143,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "fmov s0, #0x70 (1.0000)",
-        "fdiv s2, s0, s2",
-        "dup v2.2s, v2.s[0]",
+        "fdiv s3, s0, s2",
+        "dup v2.2s, v3.s[0]",
         "str d2, [x28, #768]"
       ]
     },
@@ -155,8 +157,8 @@
         "ldr d2, [x28, #784]",
         "fmov s0, #0x70 (1.0000)",
         "fsqrt s1, s2",
-        "fdiv s2, s0, s1",
-        "dup v2.2s, v2.s[0]",
+        "fdiv s3, s0, s1",
+        "dup v2.2s, v3.s[0]",
         "str d2, [x28, #768]"
       ]
     },
@@ -166,8 +168,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "fsub v2.4s, v3.4s, v2.4s",
-        "str d2, [x28, #768]"
+        "fsub v4.4s, v3.4s, v2.4s",
+        "str d4, [x28, #768]"
       ]
     },
     "pfadd mm0, mm1": {
@@ -176,8 +178,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "fadd v2.4s, v3.4s, v2.4s",
-        "str d2, [x28, #768]"
+        "fadd v4.4s, v3.4s, v2.4s",
+        "str d4, [x28, #768]"
       ]
     },
     "pfcmpgt mm0, mm1": {
@@ -186,19 +188,20 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "fcmgt v2.4s, v3.4s, v2.4s",
-        "str d2, [x28, #768]"
+        "fcmgt v4.4s, v3.4s, v2.4s",
+        "str d4, [x28, #768]"
       ]
     },
     "pfmax mm0, mm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x0f 0xa4",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "fcmgt v0.4s, v3.4s, v2.4s",
-        "bit v2.16b, v3.16b, v0.16b",
-        "str d2, [x28, #768]"
+        "fcmgt v0.4s, v2.4s, v3.4s",
+        "mov v4.16b, v3.16b",
+        "bit v4.16b, v2.16b, v0.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "pfrcpit1 mm0, mm1": {
@@ -233,8 +236,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "fsub v2.4s, v2.4s, v3.4s",
-        "str d2, [x28, #768]"
+        "fsub v4.4s, v2.4s, v3.4s",
+        "str d4, [x28, #768]"
       ]
     },
     "pfcmpeq mm0, mm1": {
@@ -243,8 +246,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "fcmeq v2.4s, v3.4s, v2.4s",
-        "str d2, [x28, #768]"
+        "fcmeq v4.4s, v3.4s, v2.4s",
+        "str d4, [x28, #768]"
       ]
     },
     "pfmul mm0, mm1": {
@@ -253,8 +256,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "fmul v2.4s, v3.4s, v2.4s",
-        "str d2, [x28, #768]"
+        "fmul v4.4s, v3.4s, v2.4s",
+        "str d4, [x28, #768]"
       ]
     },
     "pfrcpit2 mm0, mm1": {
@@ -280,10 +283,10 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "smull v2.4s, v2.4h, v3.4h",
-        "movi v3.4s, #0x80, lsl #8",
-        "add v2.4s, v2.4s, v3.4s",
-        "shrn v2.4h, v2.4s, #16",
+        "smull v4.4s, v2.4h, v3.4h",
+        "movi v2.4s, #0x80, lsl #8",
+        "add v3.4s, v4.4s, v2.4s",
+        "shrn v2.4h, v3.4s, #16",
         "str d2, [x28, #768]"
       ]
     },
@@ -292,8 +295,8 @@
       "Comment": "0x0f 0x0f 0xbb",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
-        "rev64 v2.2s, v2.2s",
-        "str d2, [x28, #768]"
+        "rev64 v3.2s, v2.2s",
+        "str d3, [x28, #768]"
       ]
     },
     "pavgusb mm0, mm1": {
@@ -302,8 +305,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "urhadd v2.16b, v3.16b, v2.16b",
-        "str d2, [x28, #768]"
+        "urhadd v4.16b, v3.16b, v2.16b",
+        "str d4, [x28, #768]"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FEXOpt/AddressingLimitations.json b/unittests/InstructionCountCI/FEXOpt/AddressingLimitations.json
index c23b03d5c1..25f73eabc3 100644
--- a/unittests/InstructionCountCI/FEXOpt/AddressingLimitations.json
+++ b/unittests/InstructionCountCI/FEXOpt/AddressingLimitations.json
@@ -14,911 +14,1149 @@
   ],
   "Instructions": {
     "movzx rax, byte [ecx - 257]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x101 (257)",
-        "mov w20, w20",
-        "ldrb w4, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x101 (257)",
+        "mov w20, w21",
+        "ldrb w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, byte [ecx - 256]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x100 (256)",
-        "mov w20, w20",
-        "ldrb w4, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x100 (256)",
+        "mov w20, w21",
+        "ldrb w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, byte [ecx + 255]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0xff (255)",
-        "mov w20, w20",
-        "ldrb w4, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0xff (255)",
+        "mov w20, w21",
+        "ldrb w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, byte [ecx + 256]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0x100 (256)",
-        "mov w20, w20",
-        "ldrb w4, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0x100 (256)",
+        "mov w20, w21",
+        "ldrb w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, byte [ecx + 4095]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0xfff (4095)",
-        "mov w20, w20",
-        "ldrb w4, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0xfff (4095)",
+        "mov w20, w21",
+        "ldrb w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, byte [ecx + 4096]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0x1000 (4096)",
-        "mov w20, w20",
-        "ldrb w4, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0x1000 (4096)",
+        "mov w20, w21",
+        "ldrb w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, word [ecx - 257]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x101 (257)",
-        "mov w20, w20",
-        "ldrh w4, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x101 (257)",
+        "mov w20, w21",
+        "ldrh w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, word [ecx - 256]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x100 (256)",
-        "mov w20, w20",
-        "ldrh w4, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x100 (256)",
+        "mov w20, w21",
+        "ldrh w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, word [ecx + 255]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0xff (255)",
-        "mov w20, w20",
-        "ldrh w4, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0xff (255)",
+        "mov w20, w21",
+        "ldrh w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, word [ecx + 256]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0x100 (256)",
-        "mov w20, w20",
-        "ldrh w4, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0x100 (256)",
+        "mov w20, w21",
+        "ldrh w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, word [ecx + 8190]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x1ffe",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldrh w4, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x1ffe",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldrh w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, word [ecx + 8191]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x1fff",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldrh w4, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x1fff",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldrh w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, word [ecx + 8192]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0x2000 (8192)",
-        "mov w20, w20",
-        "ldrh w4, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0x2000 (8192)",
+        "mov w20, w21",
+        "ldrh w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov eax, dword [ecx - 257]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x101 (257)",
-        "mov w20, w20",
-        "ldr w4, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x101 (257)",
+        "mov w20, w21",
+        "ldr w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov eax, dword [ecx - 256]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x100 (256)",
-        "mov w20, w20",
-        "ldr w4, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x100 (256)",
+        "mov w20, w21",
+        "ldr w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov eax, dword [ecx + 255]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0xff (255)",
-        "mov w20, w20",
-        "ldr w4, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0xff (255)",
+        "mov w20, w21",
+        "ldr w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov eax, dword [ecx + 256]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0x100 (256)",
-        "mov w20, w20",
-        "ldr w4, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0x100 (256)",
+        "mov w20, w21",
+        "ldr w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov eax, dword [ecx + 16380]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x3ffc",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr w4, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x3ffc",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov eax, dword [ecx + 16381]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x3ffd",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr w4, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x3ffd",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov eax, dword [ecx + 16382]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x3ffe",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr w4, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x3ffe",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov eax, dword [ecx + 16383]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x3fff",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr w4, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x3fff",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov eax, dword [ecx + 16384]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0x4000 (16384)",
-        "mov w20, w20",
-        "ldr w4, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0x4000 (16384)",
+        "mov w20, w21",
+        "ldr w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov rax, qword [ecx - 257]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x101 (257)",
-        "mov w20, w20",
-        "ldr x4, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x101 (257)",
+        "mov w20, w21",
+        "ldr x21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov rax, qword [ecx - 256]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x100 (256)",
-        "mov w20, w20",
-        "ldr x4, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x100 (256)",
+        "mov w20, w21",
+        "ldr x21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov rax, qword [ecx + 255]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0xff (255)",
-        "mov w20, w20",
-        "ldr x4, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0xff (255)",
+        "mov w20, w21",
+        "ldr x21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov rax, qword [ecx + 256]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0x100 (256)",
-        "mov w20, w20",
-        "ldr x4, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0x100 (256)",
+        "mov w20, w21",
+        "ldr x21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov rax, qword [ecx + 32760]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ff8",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr x4, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x7ff8",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr x21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov rax, qword [ecx + 32761]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ff9",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr x4, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x7ff9",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr x21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov rax, qword [ecx + 32762]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ffa",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr x4, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x7ffa",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr x21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov rax, qword [ecx + 32763]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ffb",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr x4, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x7ffb",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr x21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov rax, qword [ecx + 32764]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ffc",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr x4, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x7ffc",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr x21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov rax, qword [ecx + 32765]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ffd",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr x4, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x7ffd",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr x21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov rax, qword [ecx + 32766]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ffe",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr x4, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x7ffe",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr x21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov rax, qword [ecx + 32767]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7fff",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr x4, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x7fff",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr x21, [x20]",
+        "mov x4, x21"
       ]
     },
     "mov rax, qword [ecx + 32768]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0x8000 (32768)",
-        "mov w20, w20",
-        "ldr x4, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0x8000 (32768)",
+        "mov w20, w21",
+        "ldr x21, [x20]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, byte [rcx - 257]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x101 (257)",
-        "ldrb w4, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x101 (257)",
+        "ldrb w20, [x21]",
+        "mov x4, x20"
       ]
     },
     "movzx rax, byte [rcx - 256]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x100 (256)",
-        "ldrb w4, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x100 (256)",
+        "ldrb w20, [x21]",
+        "mov x4, x20"
       ]
     },
     "movzx rax, byte [rcx + 255]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldrb w4, [x5, #255]"
+        "mov x20, x5",
+        "ldrb w21, [x20, #255]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, byte [rcx + 256]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldrb w4, [x5, #256]"
+        "mov x20, x5",
+        "ldrb w21, [x20, #256]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, byte [rcx + 4095]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldrb w4, [x5, #4095]"
+        "mov x20, x5",
+        "ldrb w21, [x20, #4095]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, byte [rcx + 4096]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x1000",
-        "ldrb w4, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x1000",
+        "ldrb w22, [x20, x21, sxtx]",
+        "mov x4, x22"
       ]
     },
     "movzx rax, word [rcx - 257]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x101 (257)",
-        "ldrh w4, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x101 (257)",
+        "ldrh w20, [x21]",
+        "mov x4, x20"
       ]
     },
     "movzx rax, word [rcx - 256]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x100 (256)",
-        "ldrh w4, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x100 (256)",
+        "ldrh w20, [x21]",
+        "mov x4, x20"
       ]
     },
     "movzx rax, word [rcx + 255]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldurh w4, [x5, #255]"
+        "mov x20, x5",
+        "ldurh w21, [x20, #255]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, word [rcx + 256]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldrh w4, [x5, #256]"
+        "mov x20, x5",
+        "ldrh w21, [x20, #256]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, word [rcx + 8190]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldrh w4, [x5, #8190]"
+        "mov x20, x5",
+        "ldrh w21, [x20, #8190]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, word [rcx + 8191]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x1fff",
-        "ldrh w4, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x1fff",
+        "ldrh w22, [x20, x21, sxtx]",
+        "mov x4, x22"
       ]
     },
     "movzx rax, word [rcx + 8192]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x2000",
-        "ldrh w4, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x2000",
+        "ldrh w22, [x20, x21, sxtx]",
+        "mov x4, x22"
       ]
     },
     "mov eax, dword [rcx - 257]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x101 (257)",
-        "ldr w4, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x101 (257)",
+        "ldr w20, [x21]",
+        "mov x4, x20"
       ]
     },
     "mov eax, dword [rcx - 256]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x100 (256)",
-        "ldr w4, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x100 (256)",
+        "ldr w20, [x21]",
+        "mov x4, x20"
       ]
     },
     "mov eax, dword [rcx + 255]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldur w4, [x5, #255]"
+        "mov x20, x5",
+        "ldur w21, [x20, #255]",
+        "mov x4, x21"
       ]
     },
     "mov eax, dword [rcx + 256]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldr w4, [x5, #256]"
+        "mov x20, x5",
+        "ldr w21, [x20, #256]",
+        "mov x4, x21"
       ]
     },
     "mov eax, dword [rcx + 16380]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldr w4, [x5, #16380]"
+        "mov x20, x5",
+        "ldr w21, [x20, #16380]",
+        "mov x4, x21"
       ]
     },
     "mov eax, dword [rcx + 16381]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x3ffd",
-        "ldr w4, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x3ffd",
+        "ldr w22, [x20, x21, sxtx]",
+        "mov x4, x22"
       ]
     },
     "mov eax, dword [rcx + 16382]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x3ffe",
-        "ldr w4, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x3ffe",
+        "ldr w22, [x20, x21, sxtx]",
+        "mov x4, x22"
       ]
     },
     "mov eax, dword [rcx + 16383]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x3fff",
-        "ldr w4, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x3fff",
+        "ldr w22, [x20, x21, sxtx]",
+        "mov x4, x22"
       ]
     },
     "mov eax, dword [rcx + 16384]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x4000",
-        "ldr w4, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x4000",
+        "ldr w22, [x20, x21, sxtx]",
+        "mov x4, x22"
       ]
     },
     "mov rax, qword [rcx - 257]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x101 (257)",
-        "ldr x4, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x101 (257)",
+        "ldr x20, [x21]",
+        "mov x4, x20"
       ]
     },
     "mov rax, qword [rcx - 256]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x100 (256)",
-        "ldr x4, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x100 (256)",
+        "ldr x20, [x21]",
+        "mov x4, x20"
       ]
     },
     "mov rax, qword [rcx + 255]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldur x4, [x5, #255]"
+        "mov x20, x5",
+        "ldur x21, [x20, #255]",
+        "mov x4, x21"
       ]
     },
     "mov rax, qword [rcx + 256]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldr x4, [x5, #256]"
+        "mov x20, x5",
+        "ldr x21, [x20, #256]",
+        "mov x4, x21"
       ]
     },
     "mov rax, qword [rcx + 32760]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldr x4, [x5, #32760]"
+        "mov x20, x5",
+        "ldr x21, [x20, #32760]",
+        "mov x4, x21"
       ]
     },
     "mov rax, qword [rcx + 32761]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ff9",
-        "ldr x4, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x7ff9",
+        "ldr x22, [x20, x21, sxtx]",
+        "mov x4, x22"
       ]
     },
     "mov rax, qword [rcx + 32762]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ffa",
-        "ldr x4, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x7ffa",
+        "ldr x22, [x20, x21, sxtx]",
+        "mov x4, x22"
       ]
     },
     "mov rax, qword [rcx + 32763]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ffb",
-        "ldr x4, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x7ffb",
+        "ldr x22, [x20, x21, sxtx]",
+        "mov x4, x22"
       ]
     },
     "mov rax, qword [rcx + 32764]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ffc",
-        "ldr x4, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x7ffc",
+        "ldr x22, [x20, x21, sxtx]",
+        "mov x4, x22"
       ]
     },
     "mov rax, qword [rcx + 32765]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ffd",
-        "ldr x4, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x7ffd",
+        "ldr x22, [x20, x21, sxtx]",
+        "mov x4, x22"
       ]
     },
     "mov rax, qword [rcx + 32766]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ffe",
-        "ldr x4, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x7ffe",
+        "ldr x22, [x20, x21, sxtx]",
+        "mov x4, x22"
       ]
     },
     "mov rax, qword [rcx + 32767]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7fff",
-        "ldr x4, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x7fff",
+        "ldr x22, [x20, x21, sxtx]",
+        "mov x4, x22"
       ]
     },
     "mov rax, qword [rcx + 32768]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x8000",
-        "ldr x4, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x8000",
+        "ldr x22, [x20, x21, sxtx]",
+        "mov x4, x22"
       ]
     },
     "movss xmm0, [rcx + 16379]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x3ffb",
-        "ldr s16, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x3ffb",
+        "ldr s2, [x20, x21, sxtx]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [rcx + 16380]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldr s16, [x5, #16380]"
+        "mov x20, x5",
+        "ldr s2, [x20, #16380]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [rcx + 16381]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x3ffd",
-        "ldr s16, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x3ffd",
+        "ldr s2, [x20, x21, sxtx]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [rcx - 257]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x101 (257)",
-        "ldr s16, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x101 (257)",
+        "ldr s2, [x21]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [rcx - 256]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x100 (256)",
-        "ldr s16, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x100 (256)",
+        "ldr s2, [x21]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [rcx + 255]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldur s16, [x5, #255]"
+        "mov x20, x5",
+        "ldur s2, [x20, #255]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [rcx + 256]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldr s16, [x5, #256]"
+        "mov x20, x5",
+        "ldr s2, [x20, #256]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [rcx + 32759]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ff7",
-        "ldr d16, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x7ff7",
+        "ldr d2, [x20, x21, sxtx]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [rcx + 32760]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldr d16, [x5, #32760]"
+        "mov x20, x5",
+        "ldr d2, [x20, #32760]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [rcx + 32761]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ff9",
-        "ldr d16, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x7ff9",
+        "ldr d2, [x20, x21, sxtx]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [rcx - 257]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x101 (257)",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x101 (257)",
+        "ldr d2, [x21]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [rcx - 256]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x100 (256)",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x100 (256)",
+        "ldr d2, [x21]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [rcx + 255]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldur d16, [x5, #255]"
+        "mov x20, x5",
+        "ldur d2, [x20, #255]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [rcx + 256]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldr d16, [x5, #256]"
+        "mov x20, x5",
+        "ldr d2, [x20, #256]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [rcx + 65519]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0xffef",
-        "ldr d16, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0xffef",
+        "ldr d2, [x20, x21, sxtx]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [rcx + 65520]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0xfff0",
-        "ldr d16, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0xfff0",
+        "ldr d2, [x20, x21, sxtx]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [rcx + 65521]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0xfff1",
-        "ldr d16, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0xfff1",
+        "ldr d2, [x20, x21, sxtx]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [rcx - 257]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x101 (257)",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x101 (257)",
+        "ldr d2, [x21]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [rcx - 256]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x100 (256)",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x100 (256)",
+        "ldr d2, [x21]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [rcx + 255]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldur d16, [x5, #255]"
+        "mov x20, x5",
+        "ldur d2, [x20, #255]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [rcx + 256]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldr d16, [x5, #256]"
+        "mov x20, x5",
+        "ldr d2, [x20, #256]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [ecx + 16379]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x3ffb",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr s16, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x3ffb",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr s2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [ecx + 16380]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x3ffc",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr s16, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x3ffc",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr s2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [ecx + 16381]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x3ffd",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr s16, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x3ffd",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr s2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [ecx - 257]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x101 (257)",
-        "mov w20, w20",
-        "ldr s16, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x101 (257)",
+        "mov w20, w21",
+        "ldr s2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [ecx - 256]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x100 (256)",
-        "mov w20, w20",
-        "ldr s16, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x100 (256)",
+        "mov w20, w21",
+        "ldr s2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [ecx + 255]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0xff (255)",
-        "mov w20, w20",
-        "ldr s16, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0xff (255)",
+        "mov w20, w21",
+        "ldr s2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [ecx + 256]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0x100 (256)",
-        "mov w20, w20",
-        "ldr s16, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0x100 (256)",
+        "mov w20, w21",
+        "ldr s2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [ecx + 32759]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ff7",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x7ff7",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr d2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [ecx + 32760]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ff8",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x7ff8",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr d2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [ecx + 32761]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ff9",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "mov w21, #0x7ff9",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr d2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [ecx - 257]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x101 (257)",
-        "mov w20, w20",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x101 (257)",
+        "mov w20, w21",
+        "ldr d2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [ecx - 256]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x100 (256)",
-        "mov w20, w20",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x100 (256)",
+        "mov w20, w21",
+        "ldr d2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [ecx + 255]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0xff (255)",
-        "mov w20, w20",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0xff (255)",
+        "mov w20, w21",
+        "ldr d2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [ecx + 256]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0x100 (256)",
-        "mov w20, w20",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0x100 (256)",
+        "mov w20, w21",
+        "ldr d2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [ecx + 65519]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0xffef",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "mov w21, #0xffef",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr d2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [ecx + 65520]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0xfff0",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "mov w21, #0xfff0",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr d2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [ecx + 65521]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "ExpectedArm64ASM": [
-        "mov w20, #0xfff1",
-        "add x20, x5, x20",
-        "mov w20, w20",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "mov w21, #0xfff1",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "ldr d2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [ecx - 257]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x101 (257)",
-        "mov w20, w20",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x101 (257)",
+        "mov w20, w21",
+        "ldr d2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [ecx - 256]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x100 (256)",
-        "mov w20, w20",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x100 (256)",
+        "mov w20, w21",
+        "ldr d2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [ecx + 255]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0xff (255)",
-        "mov w20, w20",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0xff (255)",
+        "mov w20, w21",
+        "ldr d2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [ecx + 256]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "add x20, x5, #0x100 (256)",
-        "mov w20, w20",
-        "ldr d16, [x20]"
+        "mov x20, x5",
+        "add x21, x20, #0x100 (256)",
+        "mov w20, w21",
+        "ldr d2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "prefetch [rcx - 257]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x101 (257)",
-        "prfm pldl1keep, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x101 (257)",
+        "prfm pldl1keep, [x21]"
       ]
     },
     "prefetch [rcx - 256]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "sub x20, x5, #0x100 (256)",
-        "prfm pldl1keep, [x20]"
+        "mov x20, x5",
+        "sub x21, x20, #0x100 (256)",
+        "prfm pldl1keep, [x21]"
       ]
     },
     "prefetch [rcx + 255]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "ExpectedArm64ASM": [
-        "prfum pldl1keep, [x5, #255]"
+        "mov x20, x5",
+        "prfum pldl1keep, [x20, #255]"
       ]
     },
     "prefetch [rcx + 256]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "ExpectedArm64ASM": [
-        "prfm pldl1keep, [x5, #256]"
+        "mov x20, x5",
+        "prfm pldl1keep, [x20, #256]"
       ]
     },
     "prefetch [rcx + 32760]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "ExpectedArm64ASM": [
-        "prfm pldl1keep, [x5, #32760]"
+        "mov x20, x5",
+        "prfm pldl1keep, [x20, #32760]"
       ]
     },
     "prefetch [rcx + 32761]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ff9",
-        "prfm pldl1keep, [x5, x20, sxtx]"
+        "mov x20, x5",
+        "mov w21, #0x7ff9",
+        "prfm pldl1keep, [x20, x21, sxtx]"
       ]
     },
     "prefetch [rax + rcx*1]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "prfm pldl1keep, [x4, x5, sxtx]"
+        "mov x20, x4",
+        "mov x21, x5",
+        "prfm pldl1keep, [x20, x21, sxtx]"
       ]
     },
     "prefetch [rax + rcx*2]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "add x20, x4, x5, lsl #1",
-        "prfm pldl1keep, [x20]"
+        "mov x20, x4",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #1",
+        "prfm pldl1keep, [x22]"
       ]
     },
     "prefetch [rax + rcx*4]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "add x20, x4, x5, lsl #2",
-        "prfm pldl1keep, [x20]"
+        "mov x20, x4",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #2",
+        "prfm pldl1keep, [x22]"
       ]
     },
     "prefetch [rax + rcx*8]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "prfm pldl1keep, [x5, x4, sxtx #3]"
+        "mov x20, x4",
+        "mov x21, x5",
+        "prfm pldl1keep, [x21, x20, sxtx #3]"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FEXOpt/AddressingLimitations_32Bit.json b/unittests/InstructionCountCI/FEXOpt/AddressingLimitations_32Bit.json
index c1f9b09346..43b73e192b 100644
--- a/unittests/InstructionCountCI/FEXOpt/AddressingLimitations_32Bit.json
+++ b/unittests/InstructionCountCI/FEXOpt/AddressingLimitations_32Bit.json
@@ -14,294 +14,380 @@
   ],
   "Instructions": {
     "movzx eax, byte [ecx - 257]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0xfffffeff",
-        "ldrb w4, [x5, w20, sxtw]"
+        "mov w20, w5",
+        "mov w21, #0xfffffeff",
+        "ldrb w22, [x20, w21, sxtw]",
+        "mov w4, w22"
       ]
     },
     "movzx eax, byte [ecx - 256]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0xffffff00",
-        "ldrb w4, [x5, w20, sxtw]"
+        "mov w20, w5",
+        "mov w21, #0xffffff00",
+        "ldrb w22, [x20, w21, sxtw]",
+        "mov w4, w22"
       ]
     },
     "movzx eax, byte [ecx + 255]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldrb w4, [x5, #255]"
+        "mov w20, w5",
+        "ldrb w21, [x20, #255]",
+        "mov w4, w21"
       ]
     },
     "movzx eax, byte [ecx + 256]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldrb w4, [x5, #256]"
+        "mov w20, w5",
+        "ldrb w21, [x20, #256]",
+        "mov w4, w21"
       ]
     },
     "movzx eax, byte [ecx + 4095]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldrb w4, [x5, #4095]"
+        "mov w20, w5",
+        "ldrb w21, [x20, #4095]",
+        "mov w4, w21"
       ]
     },
     "movzx eax, byte [ecx + 4096]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x1000",
-        "ldrb w4, [x5, x20, sxtx]"
+        "mov w20, w5",
+        "mov w21, #0x1000",
+        "ldrb w22, [x20, x21, sxtx]",
+        "mov w4, w22"
       ]
     },
     "movzx eax, word [ecx - 257]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0xfffffeff",
-        "ldrh w4, [x5, w20, sxtw]"
+        "mov w20, w5",
+        "mov w21, #0xfffffeff",
+        "ldrh w22, [x20, w21, sxtw]",
+        "mov w4, w22"
       ]
     },
     "movzx eax, word [ecx - 256]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0xffffff00",
-        "ldrh w4, [x5, w20, sxtw]"
+        "mov w20, w5",
+        "mov w21, #0xffffff00",
+        "ldrh w22, [x20, w21, sxtw]",
+        "mov w4, w22"
       ]
     },
     "movzx eax, word [ecx + 255]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldurh w4, [x5, #255]"
+        "mov w20, w5",
+        "ldurh w21, [x20, #255]",
+        "mov w4, w21"
       ]
     },
     "movzx eax, word [ecx + 256]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldrh w4, [x5, #256]"
+        "mov w20, w5",
+        "ldrh w21, [x20, #256]",
+        "mov w4, w21"
       ]
     },
     "movzx eax, word [ecx + 8190]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldrh w4, [x5, #8190]"
+        "mov w20, w5",
+        "ldrh w21, [x20, #8190]",
+        "mov w4, w21"
       ]
     },
     "movzx eax, word [ecx + 8191]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x1fff",
-        "ldrh w4, [x5, x20, sxtx]"
+        "mov w20, w5",
+        "mov w21, #0x1fff",
+        "ldrh w22, [x20, x21, sxtx]",
+        "mov w4, w22"
       ]
     },
     "movzx eax, word [ecx + 8192]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x2000",
-        "ldrh w4, [x5, x20, sxtx]"
+        "mov w20, w5",
+        "mov w21, #0x2000",
+        "ldrh w22, [x20, x21, sxtx]",
+        "mov w4, w22"
       ]
     },
     "mov eax, dword [ecx - 257]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0xfffffeff",
-        "ldr w4, [x5, w20, sxtw]"
+        "mov w20, w5",
+        "mov w21, #0xfffffeff",
+        "ldr w22, [x20, w21, sxtw]",
+        "mov w4, w22"
       ]
     },
     "mov eax, dword [ecx - 256]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0xffffff00",
-        "ldr w4, [x5, w20, sxtw]"
+        "mov w20, w5",
+        "mov w21, #0xffffff00",
+        "ldr w22, [x20, w21, sxtw]",
+        "mov w4, w22"
       ]
     },
     "mov eax, dword [ecx + 255]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldur w4, [x5, #255]"
+        "mov w20, w5",
+        "ldur w21, [x20, #255]",
+        "mov w4, w21"
       ]
     },
     "mov eax, dword [ecx + 256]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldr w4, [x5, #256]"
+        "mov w20, w5",
+        "ldr w21, [x20, #256]",
+        "mov w4, w21"
       ]
     },
     "mov eax, dword [ecx + 16380]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldr w4, [x5, #16380]"
+        "mov w20, w5",
+        "ldr w21, [x20, #16380]",
+        "mov w4, w21"
       ]
     },
     "mov eax, dword [ecx + 16381]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x3ffd",
-        "ldr w4, [x5, x20, sxtx]"
+        "mov w20, w5",
+        "mov w21, #0x3ffd",
+        "ldr w22, [x20, x21, sxtx]",
+        "mov w4, w22"
       ]
     },
     "mov eax, dword [ecx + 16382]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x3ffe",
-        "ldr w4, [x5, x20, sxtx]"
+        "mov w20, w5",
+        "mov w21, #0x3ffe",
+        "ldr w22, [x20, x21, sxtx]",
+        "mov w4, w22"
       ]
     },
     "mov eax, dword [ecx + 16383]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x3fff",
-        "ldr w4, [x5, x20, sxtx]"
+        "mov w20, w5",
+        "mov w21, #0x3fff",
+        "ldr w22, [x20, x21, sxtx]",
+        "mov w4, w22"
       ]
     },
     "mov eax, dword [ecx + 16384]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "add w20, w5, #0x4000 (16384)",
-        "ldr w4, [x20]"
+        "mov w20, w5",
+        "add w21, w20, #0x4000 (16384)",
+        "ldr w20, [x21]",
+        "mov w4, w20"
       ]
     },
     "movss xmm0, [ecx + 16379]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x3ffb",
-        "ldr s16, [x5, x20, sxtx]"
+        "mov w20, w5",
+        "mov w21, #0x3ffb",
+        "ldr s2, [x20, x21, sxtx]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [ecx + 16380]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldr s16, [x5, #16380]"
+        "mov w20, w5",
+        "ldr s2, [x20, #16380]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [ecx + 16381]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0x3ffd",
-        "ldr s16, [x5, x20, sxtx]"
+        "mov w20, w5",
+        "mov w21, #0x3ffd",
+        "ldr s2, [x20, x21, sxtx]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [ecx - 257]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0xfffffeff",
-        "ldr s16, [x5, w20, sxtw]"
+        "mov w20, w5",
+        "mov w21, #0xfffffeff",
+        "ldr s2, [x20, w21, sxtw]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [ecx - 256]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0xffffff00",
-        "ldr s16, [x5, w20, sxtw]"
+        "mov w20, w5",
+        "mov w21, #0xffffff00",
+        "ldr s2, [x20, w21, sxtw]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [ecx + 255]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldur s16, [x5, #255]"
+        "mov w20, w5",
+        "ldur s2, [x20, #255]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss xmm0, [ecx + 256]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldr s16, [x5, #256]"
+        "mov w20, w5",
+        "ldr s2, [x20, #256]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [ecx + 32759]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ff7",
-        "add w20, w5, w20",
-        "ldr d16, [x20]"
+        "mov w20, w5",
+        "mov w21, #0x7ff7",
+        "add w22, w20, w21",
+        "ldr d2, [x22]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [ecx + 32760]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ff8",
-        "add w20, w5, w20",
-        "ldr d16, [x20]"
+        "mov w20, w5",
+        "mov w21, #0x7ff8",
+        "add w22, w20, w21",
+        "ldr d2, [x22]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [ecx + 32761]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "mov w20, #0x7ff9",
-        "add w20, w5, w20",
-        "ldr d16, [x20]"
+        "mov w20, w5",
+        "mov w21, #0x7ff9",
+        "add w22, w20, w21",
+        "ldr d2, [x22]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [ecx - 257]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0xfffffeff",
-        "ldr d16, [x5, w20, sxtw]"
+        "mov w20, w5",
+        "mov w21, #0xfffffeff",
+        "ldr d2, [x20, w21, sxtw]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [ecx - 256]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0xffffff00",
-        "ldr d16, [x5, w20, sxtw]"
+        "mov w20, w5",
+        "mov w21, #0xffffff00",
+        "ldr d2, [x20, w21, sxtw]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [ecx + 255]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldur d16, [x5, #255]"
+        "mov w20, w5",
+        "ldur d2, [x20, #255]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd xmm0, [ecx + 256]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldr d16, [x5, #256]"
+        "mov w20, w5",
+        "ldr d2, [x20, #256]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [ecx + 65519]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "mov w20, #0xffef",
-        "add w20, w5, w20",
-        "ldr d16, [x20]"
+        "mov w20, w5",
+        "mov w21, #0xffef",
+        "add w22, w20, w21",
+        "ldr d2, [x22]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [ecx + 65520]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "mov w20, #0xfff0",
-        "add w20, w5, w20",
-        "ldr d16, [x20]"
+        "mov w20, w5",
+        "mov w21, #0xfff0",
+        "add w22, w20, w21",
+        "ldr d2, [x22]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [ecx + 65521]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "ExpectedArm64ASM": [
-        "mov w20, #0xfff1",
-        "add w20, w5, w20",
-        "ldr d16, [x20]"
+        "mov w20, w5",
+        "mov w21, #0xfff1",
+        "add w22, w20, w21",
+        "ldr d2, [x22]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [ecx - 257]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0xfffffeff",
-        "ldr d16, [x5, w20, sxtw]"
+        "mov w20, w5",
+        "mov w21, #0xfffffeff",
+        "ldr d2, [x20, w21, sxtw]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [ecx - 256]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "ExpectedArm64ASM": [
-        "mov w20, #0xffffff00",
-        "ldr d16, [x5, w20, sxtw]"
+        "mov w20, w5",
+        "mov w21, #0xffffff00",
+        "ldr d2, [x20, w21, sxtw]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [ecx + 255]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldur d16, [x5, #255]"
+        "mov w20, w5",
+        "ldur d2, [x20, #255]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, [ecx + 256]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ldr d16, [x5, #256]"
+        "mov w20, w5",
+        "ldr d2, [x20, #256]",
+        "mov v16.16b, v2.16b"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FEXOpt/MultiInst.json b/unittests/InstructionCountCI/FEXOpt/MultiInst.json
index 53237c8ea2..a90b9e7cdf 100644
--- a/unittests/InstructionCountCI/FEXOpt/MultiInst.json
+++ b/unittests/InstructionCountCI/FEXOpt/MultiInst.json
@@ -14,7 +14,7 @@
   ],
   "Instructions": {
     "push ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Mergable 16-bit pushes. May or may not be an optimization."
       ],
@@ -23,12 +23,19 @@
         "push bx"
       ],
       "ExpectedArm64ASM": [
-        "strh w4, [x8, #-2]!",
-        "strh w7, [x8, #-2]!"
+        "mov x20, x4",
+        "mov x21, x8",
+        "mov x22, x21",
+        "strh w20, [x22, #-2]!",
+        "mov x8, x22",
+        "mov x20, x7",
+        "mov x21, x22",
+        "strh w20, [x21, #-2]!",
+        "mov x8, x21"
       ]
     },
     "push rax, rbx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Mergable 64-bit pushes"
       ],
@@ -37,12 +44,19 @@
         "push rbx"
       ],
       "ExpectedArm64ASM": [
-        "str x4, [x8, #-8]!",
-        "str x7, [x8, #-8]!"
+        "mov x20, x4",
+        "mov x21, x8",
+        "mov x22, x21",
+        "str x20, [x22, #-8]!",
+        "mov x8, x22",
+        "mov x20, x7",
+        "mov x21, x22",
+        "str x20, [x21, #-8]!",
+        "mov x8, x21"
       ]
     },
     "adds xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Redundant scalar adds that can get eliminated without AFP."
       ],
@@ -51,14 +65,21 @@
         "addss xmm0, xmm2"
       ],
       "ExpectedArm64ASM": [
-        "fadd s0, s16, s17",
-        "mov v16.s[0], v0.s[0]",
-        "fadd s0, s16, s18",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fadd s0, s2, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b",
+        "mov v2.16b, v18.16b",
+        "mov v3.16b, v4.16b",
+        "fadd s0, s4, s2",
+        "mov v3.s[0], v0.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "positive movsb": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -70,14 +91,18 @@
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
         "strb w20, [x28, #714]",
-        "ldrb w20, [x10]",
-        "strb w20, [x11]",
-        "add x10, x10, #0x1 (1)",
-        "add x11, x11, #0x1 (1)"
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrb w22, [x20]",
+        "strb w22, [x21]",
+        "add x22, x20, #0x1 (1)",
+        "add x20, x21, #0x1 (1)",
+        "mov x10, x22",
+        "mov x11, x20"
       ]
     },
     "positive movsw": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -89,14 +114,18 @@
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
         "strb w20, [x28, #714]",
-        "ldrh w20, [x10]",
-        "strh w20, [x11]",
-        "add x10, x10, #0x2 (2)",
-        "add x11, x11, #0x2 (2)"
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrh w22, [x20]",
+        "strh w22, [x21]",
+        "add x22, x20, #0x2 (2)",
+        "add x20, x21, #0x2 (2)",
+        "mov x10, x22",
+        "mov x11, x20"
       ]
     },
     "positive movsd": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -108,14 +137,18 @@
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
         "strb w20, [x28, #714]",
-        "ldr w20, [x10]",
-        "str w20, [x11]",
-        "add x10, x10, #0x4 (4)",
-        "add x11, x11, #0x4 (4)"
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr w22, [x20]",
+        "str w22, [x21]",
+        "add x22, x20, #0x4 (4)",
+        "add x20, x21, #0x4 (4)",
+        "mov x10, x22",
+        "mov x11, x20"
       ]
     },
     "positive movsq": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -127,14 +160,18 @@
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
         "strb w20, [x28, #714]",
-        "ldr x20, [x10]",
-        "str x20, [x11]",
-        "add x10, x10, #0x8 (8)",
-        "add x11, x11, #0x8 (8)"
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr x22, [x20]",
+        "str x22, [x21]",
+        "add x22, x20, #0x8 (8)",
+        "add x20, x21, #0x8 (8)",
+        "mov x10, x22",
+        "mov x11, x20"
       ]
     },
     "negative movsb": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -146,14 +183,18 @@
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "strb w20, [x28, #714]",
-        "ldrb w20, [x10]",
-        "strb w20, [x11]",
-        "sub x10, x10, #0x1 (1)",
-        "sub x11, x11, #0x1 (1)"
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrb w22, [x20]",
+        "strb w22, [x21]",
+        "sub x22, x20, #0x1 (1)",
+        "sub x20, x21, #0x1 (1)",
+        "mov x10, x22",
+        "mov x11, x20"
       ]
     },
     "negative movsw": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -165,14 +206,18 @@
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "strb w20, [x28, #714]",
-        "ldrh w20, [x10]",
-        "strh w20, [x11]",
-        "sub x10, x10, #0x2 (2)",
-        "sub x11, x11, #0x2 (2)"
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrh w22, [x20]",
+        "strh w22, [x21]",
+        "sub x22, x20, #0x2 (2)",
+        "sub x20, x21, #0x2 (2)",
+        "mov x10, x22",
+        "mov x11, x20"
       ]
     },
     "negative movsd": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -184,14 +229,18 @@
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "strb w20, [x28, #714]",
-        "ldr w20, [x10]",
-        "str w20, [x11]",
-        "sub x10, x10, #0x4 (4)",
-        "sub x11, x11, #0x4 (4)"
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr w22, [x20]",
+        "str w22, [x21]",
+        "sub x22, x20, #0x4 (4)",
+        "sub x20, x21, #0x4 (4)",
+        "mov x10, x22",
+        "mov x11, x20"
       ]
     },
     "negative movsq": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -203,14 +252,18 @@
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "strb w20, [x28, #714]",
-        "ldr x20, [x10]",
-        "str x20, [x11]",
-        "sub x10, x10, #0x8 (8)",
-        "sub x11, x11, #0x8 (8)"
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr x22, [x20]",
+        "str x22, [x21]",
+        "sub x22, x20, #0x8 (8)",
+        "sub x20, x21, #0x8 (8)",
+        "mov x10, x22",
+        "mov x11, x20"
       ]
     },
     "positive rep movsb": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 49,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -223,9 +276,12 @@
         "mov w20, #0x0",
         "mov w21, #0x1",
         "strb w21, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
-        "mov x2, x10",
+        "mov x21, x10",
+        "mov x22, x11",
+        "mov x23, x5",
+        "mov x0, x23",
+        "mov x1, x22",
+        "mov x2, x21",
         "cbz x0, #+0x78",
         "sub x3, x1, x2",
         "tbz x3, #63, #+0x8",
@@ -256,18 +312,20 @@
         "strb w3, [x1], #1",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0xc",
-        "mov x0, x11",
-        "mov x1, x10",
-        "mov x2, x5",
-        "add x22, x0, x2",
-        "add x23, x1, x2",
-        "mov x11, x22",
-        "mov x10, x23",
-        "mov x5, x20"
+        "mov x0, x22",
+        "mov x1, x21",
+        "mov x2, x23",
+        "add x24, x0, x2",
+        "add x25, x1, x2",
+        "mov x21, x24",
+        "mov x22, x25",
+        "mov x5, x20",
+        "mov x11, x21",
+        "mov x10, x22"
       ]
     },
     "positive rep movsw": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 49,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -280,9 +338,12 @@
         "mov w20, #0x0",
         "mov w21, #0x1",
         "strb w21, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
-        "mov x2, x10",
+        "mov x21, x10",
+        "mov x22, x11",
+        "mov x23, x5",
+        "mov x0, x23",
+        "mov x1, x22",
+        "mov x2, x21",
         "cbz x0, #+0x78",
         "sub x3, x1, x2",
         "tbz x3, #63, #+0x8",
@@ -313,18 +374,20 @@
         "strh w3, [x1], #2",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0xc",
-        "mov x0, x11",
-        "mov x1, x10",
-        "mov x2, x5",
-        "add x22, x0, x2, lsl #1",
-        "add x23, x1, x2, lsl #1",
-        "mov x11, x22",
-        "mov x10, x23",
-        "mov x5, x20"
+        "mov x0, x22",
+        "mov x1, x21",
+        "mov x2, x23",
+        "add x24, x0, x2, lsl #1",
+        "add x25, x1, x2, lsl #1",
+        "mov x21, x24",
+        "mov x22, x25",
+        "mov x5, x20",
+        "mov x11, x21",
+        "mov x10, x22"
       ]
     },
     "positive rep movsd": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 49,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -337,9 +400,12 @@
         "mov w20, #0x0",
         "mov w21, #0x1",
         "strb w21, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
-        "mov x2, x10",
+        "mov x21, x10",
+        "mov x22, x11",
+        "mov x23, x5",
+        "mov x0, x23",
+        "mov x1, x22",
+        "mov x2, x21",
         "cbz x0, #+0x78",
         "sub x3, x1, x2",
         "tbz x3, #63, #+0x8",
@@ -370,18 +436,20 @@
         "str w3, [x1], #4",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0xc",
-        "mov x0, x11",
-        "mov x1, x10",
-        "mov x2, x5",
-        "add x22, x0, x2, lsl #2",
-        "add x23, x1, x2, lsl #2",
-        "mov x11, x22",
-        "mov x10, x23",
-        "mov x5, x20"
+        "mov x0, x22",
+        "mov x1, x21",
+        "mov x2, x23",
+        "add x24, x0, x2, lsl #2",
+        "add x25, x1, x2, lsl #2",
+        "mov x21, x24",
+        "mov x22, x25",
+        "mov x5, x20",
+        "mov x11, x21",
+        "mov x10, x22"
       ]
     },
     "positive rep movsq": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 49,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -394,9 +462,12 @@
         "mov w20, #0x0",
         "mov w21, #0x1",
         "strb w21, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
-        "mov x2, x10",
+        "mov x21, x10",
+        "mov x22, x11",
+        "mov x23, x5",
+        "mov x0, x23",
+        "mov x1, x22",
+        "mov x2, x21",
         "cbz x0, #+0x78",
         "sub x3, x1, x2",
         "tbz x3, #63, #+0x8",
@@ -427,18 +498,20 @@
         "str x3, [x1], #8",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0xc",
-        "mov x0, x11",
-        "mov x1, x10",
-        "mov x2, x5",
-        "add x22, x0, x2, lsl #3",
-        "add x23, x1, x2, lsl #3",
-        "mov x11, x22",
-        "mov x10, x23",
-        "mov x5, x20"
+        "mov x0, x22",
+        "mov x1, x21",
+        "mov x2, x23",
+        "add x24, x0, x2, lsl #3",
+        "add x25, x1, x2, lsl #3",
+        "mov x21, x24",
+        "mov x22, x25",
+        "mov x5, x20",
+        "mov x11, x21",
+        "mov x10, x22"
       ]
     },
     "negative rep movsb": {
-      "ExpectedInstructionCount": 47,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -450,9 +523,12 @@
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "strb w20, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
-        "mov x2, x10",
+        "mov x20, x10",
+        "mov x21, x11",
+        "mov x22, x5",
+        "mov x0, x22",
+        "mov x1, x21",
+        "mov x2, x20",
         "cbz x0, #+0x88",
         "sub x3, x1, x2",
         "tbz x3, #63, #+0x8",
@@ -487,18 +563,21 @@
         "strb w3, [x1], #-1",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0xc",
-        "mov x0, x11",
-        "mov x1, x10",
-        "mov x2, x5",
-        "sub x20, x0, x2",
-        "sub x21, x1, x2",
+        "mov x0, x21",
+        "mov x1, x20",
+        "mov x2, x22",
+        "sub x24, x0, x2",
+        "sub x25, x1, x2",
+        "mov x20, x24",
+        "mov x21, x25",
+        "mov w22, #0x0",
+        "mov x5, x22",
         "mov x11, x20",
-        "mov x10, x21",
-        "mov w5, #0x0"
+        "mov x10, x21"
       ]
     },
     "negative rep movsw": {
-      "ExpectedInstructionCount": 47,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -510,9 +589,12 @@
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "strb w20, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
-        "mov x2, x10",
+        "mov x20, x10",
+        "mov x21, x11",
+        "mov x22, x5",
+        "mov x0, x22",
+        "mov x1, x21",
+        "mov x2, x20",
         "cbz x0, #+0x88",
         "sub x3, x1, x2",
         "tbz x3, #63, #+0x8",
@@ -547,18 +629,21 @@
         "strh w3, [x1], #-2",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0xc",
-        "mov x0, x11",
-        "mov x1, x10",
-        "mov x2, x5",
-        "sub x20, x0, x2, lsl #1",
-        "sub x21, x1, x2, lsl #1",
+        "mov x0, x21",
+        "mov x1, x20",
+        "mov x2, x22",
+        "sub x24, x0, x2, lsl #1",
+        "sub x25, x1, x2, lsl #1",
+        "mov x20, x24",
+        "mov x21, x25",
+        "mov w22, #0x0",
+        "mov x5, x22",
         "mov x11, x20",
-        "mov x10, x21",
-        "mov w5, #0x0"
+        "mov x10, x21"
       ]
     },
     "negative rep movsd": {
-      "ExpectedInstructionCount": 47,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -570,9 +655,12 @@
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "strb w20, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
-        "mov x2, x10",
+        "mov x20, x10",
+        "mov x21, x11",
+        "mov x22, x5",
+        "mov x0, x22",
+        "mov x1, x21",
+        "mov x2, x20",
         "cbz x0, #+0x88",
         "sub x3, x1, x2",
         "tbz x3, #63, #+0x8",
@@ -607,18 +695,21 @@
         "str w3, [x1], #-4",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0xc",
-        "mov x0, x11",
-        "mov x1, x10",
-        "mov x2, x5",
-        "sub x20, x0, x2, lsl #2",
-        "sub x21, x1, x2, lsl #2",
+        "mov x0, x21",
+        "mov x1, x20",
+        "mov x2, x22",
+        "sub x24, x0, x2, lsl #2",
+        "sub x25, x1, x2, lsl #2",
+        "mov x20, x24",
+        "mov x21, x25",
+        "mov w22, #0x0",
+        "mov x5, x22",
         "mov x11, x20",
-        "mov x10, x21",
-        "mov w5, #0x0"
+        "mov x10, x21"
       ]
     },
     "negative rep movsq": {
-      "ExpectedInstructionCount": 47,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -630,9 +721,12 @@
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "strb w20, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
-        "mov x2, x10",
+        "mov x20, x10",
+        "mov x21, x11",
+        "mov x22, x5",
+        "mov x0, x22",
+        "mov x1, x21",
+        "mov x2, x20",
         "cbz x0, #+0x88",
         "sub x3, x1, x2",
         "tbz x3, #63, #+0x8",
@@ -667,18 +761,21 @@
         "str x3, [x1], #-8",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0xc",
-        "mov x0, x11",
-        "mov x1, x10",
-        "mov x2, x5",
-        "sub x20, x0, x2, lsl #3",
-        "sub x21, x1, x2, lsl #3",
+        "mov x0, x21",
+        "mov x1, x20",
+        "mov x2, x22",
+        "sub x24, x0, x2, lsl #3",
+        "sub x25, x1, x2, lsl #3",
+        "mov x20, x24",
+        "mov x21, x25",
+        "mov w22, #0x0",
+        "mov x5, x22",
         "mov x11, x20",
-        "mov x10, x21",
-        "mov w5, #0x0"
+        "mov x10, x21"
       ]
     },
     "positive rep stosb": {
-      "ExpectedInstructionCount": 30,
+      "ExpectedInstructionCount": 34,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -691,13 +788,16 @@
         "mov w20, #0x0",
         "mov w21, #0x1",
         "strb w21, [x28, #714]",
-        "uxtb w21, w4",
-        "mov x0, x5",
-        "mov x1, x11",
+        "mov x21, x4",
+        "uxtb w22, w21",
+        "mov x21, x11",
+        "mov x23, x5",
+        "mov x0, x23",
+        "mov x1, x21",
         "cbz x0, #+0x58",
         "sub x0, x0, #0x20 (32)",
         "tbnz x0, #63, #+0x3c",
-        "dup v1.16b, w21",
+        "dup v1.16b, w22",
         "sub x0, x0, #0x20 (32)",
         "tbnz x0, #63, #+0x14",
         "stp q1, q1, [x1], #32",
@@ -713,15 +813,16 @@
         "tbz x0, #63, #-0x8",
         "add x0, x0, #0x20 (32)",
         "cbz x0, #+0x10",
-        "strb w21, [x1], #1",
+        "strb w22, [x1], #1",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0x8",
-        "add x11, x11, x5",
-        "mov x5, x20"
+        "add x24, x21, x23",
+        "mov x5, x20",
+        "mov x11, x24"
       ]
     },
     "positive rep stosw": {
-      "ExpectedInstructionCount": 30,
+      "ExpectedInstructionCount": 34,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -734,13 +835,16 @@
         "mov w20, #0x0",
         "mov w21, #0x1",
         "strb w21, [x28, #714]",
-        "uxth w21, w4",
-        "mov x0, x5",
-        "mov x1, x11",
+        "mov x21, x4",
+        "uxth w22, w21",
+        "mov x21, x11",
+        "mov x23, x5",
+        "mov x0, x23",
+        "mov x1, x21",
         "cbz x0, #+0x58",
         "sub x0, x0, #0x10 (16)",
         "tbnz x0, #63, #+0x3c",
-        "dup v1.8h, w21",
+        "dup v1.8h, w22",
         "sub x0, x0, #0x10 (16)",
         "tbnz x0, #63, #+0x14",
         "stp q1, q1, [x1], #32",
@@ -756,15 +860,16 @@
         "tbz x0, #63, #-0x8",
         "add x0, x0, #0x10 (16)",
         "cbz x0, #+0x10",
-        "strh w21, [x1], #2",
+        "strh w22, [x1], #2",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0x8",
-        "add x11, x11, x5, lsl #1",
-        "mov x5, x20"
+        "add x24, x21, x23, lsl #1",
+        "mov x5, x20",
+        "mov x11, x24"
       ]
     },
     "positive rep stosd": {
-      "ExpectedInstructionCount": 30,
+      "ExpectedInstructionCount": 34,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -777,13 +882,16 @@
         "mov w20, #0x0",
         "mov w21, #0x1",
         "strb w21, [x28, #714]",
-        "mov w21, w4",
-        "mov x0, x5",
-        "mov x1, x11",
+        "mov x21, x4",
+        "mov w22, w21",
+        "mov x21, x11",
+        "mov x23, x5",
+        "mov x0, x23",
+        "mov x1, x21",
         "cbz x0, #+0x58",
         "sub x0, x0, #0x8 (8)",
         "tbnz x0, #63, #+0x3c",
-        "dup v1.4s, w21",
+        "dup v1.4s, w22",
         "sub x0, x0, #0x8 (8)",
         "tbnz x0, #63, #+0x14",
         "stp q1, q1, [x1], #32",
@@ -799,15 +907,16 @@
         "tbz x0, #63, #-0x8",
         "add x0, x0, #0x8 (8)",
         "cbz x0, #+0x10",
-        "str w21, [x1], #4",
+        "str w22, [x1], #4",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0x8",
-        "add x11, x11, x5, lsl #2",
-        "mov x5, x20"
+        "add x24, x21, x23, lsl #2",
+        "mov x5, x20",
+        "mov x11, x24"
       ]
     },
     "positive rep stosq": {
-      "ExpectedInstructionCount": 29,
+      "ExpectedInstructionCount": 33,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -820,12 +929,15 @@
         "mov w20, #0x0",
         "mov w21, #0x1",
         "strb w21, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
+        "mov x21, x4",
+        "mov x22, x11",
+        "mov x23, x5",
+        "mov x0, x23",
+        "mov x1, x22",
         "cbz x0, #+0x58",
         "sub x0, x0, #0x4 (4)",
         "tbnz x0, #63, #+0x3c",
-        "dup v1.2d, x4",
+        "dup v1.2d, x21",
         "sub x0, x0, #0x4 (4)",
         "tbnz x0, #63, #+0x14",
         "stp q1, q1, [x1], #32",
@@ -841,15 +953,16 @@
         "tbz x0, #63, #-0x8",
         "add x0, x0, #0x4 (4)",
         "cbz x0, #+0x10",
-        "str x4, [x1], #8",
+        "str x21, [x1], #8",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0x8",
-        "add x11, x11, x5, lsl #3",
-        "mov x5, x20"
+        "add x24, x22, x23, lsl #3",
+        "mov x5, x20",
+        "mov x11, x24"
       ]
     },
     "negative rep stosb": {
-      "ExpectedInstructionCount": 31,
+      "ExpectedInstructionCount": 36,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -861,14 +974,17 @@
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "strb w20, [x28, #714]",
-        "uxtb w20, w4",
-        "mov x0, x5",
-        "mov x1, x11",
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "mov x20, x11",
+        "mov x22, x5",
+        "mov x0, x22",
+        "mov x1, x20",
         "cbz x0, #+0x60",
         "sub x1, x1, #0x1f (31)",
         "sub x0, x0, #0x20 (32)",
         "tbnz x0, #63, #+0x3c",
-        "dup v1.16b, w20",
+        "dup v1.16b, w21",
         "sub x0, x0, #0x20 (32)",
         "tbnz x0, #63, #+0x14",
         "stp q1, q1, [x1], #-32",
@@ -885,15 +1001,17 @@
         "add x0, x0, #0x20 (32)",
         "cbz x0, #+0x14",
         "add x1, x1, #0x1f (31)",
-        "strb w20, [x1], #-1",
+        "strb w21, [x1], #-1",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0x8",
-        "sub x11, x11, x5",
-        "mov w5, #0x0"
+        "sub x23, x20, x22",
+        "mov w20, #0x0",
+        "mov x5, x20",
+        "mov x11, x23"
       ]
     },
     "negative rep stosw": {
-      "ExpectedInstructionCount": 31,
+      "ExpectedInstructionCount": 36,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -905,14 +1023,17 @@
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "strb w20, [x28, #714]",
-        "uxth w20, w4",
-        "mov x0, x5",
-        "mov x1, x11",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "mov x20, x11",
+        "mov x22, x5",
+        "mov x0, x22",
+        "mov x1, x20",
         "cbz x0, #+0x60",
         "sub x1, x1, #0x1e (30)",
         "sub x0, x0, #0x10 (16)",
         "tbnz x0, #63, #+0x3c",
-        "dup v1.8h, w20",
+        "dup v1.8h, w21",
         "sub x0, x0, #0x10 (16)",
         "tbnz x0, #63, #+0x14",
         "stp q1, q1, [x1], #-32",
@@ -929,15 +1050,17 @@
         "add x0, x0, #0x10 (16)",
         "cbz x0, #+0x14",
         "add x1, x1, #0x1e (30)",
-        "strh w20, [x1], #-2",
+        "strh w21, [x1], #-2",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0x8",
-        "sub x11, x11, x5, lsl #1",
-        "mov w5, #0x0"
+        "sub x23, x20, x22, lsl #1",
+        "mov w20, #0x0",
+        "mov x5, x20",
+        "mov x11, x23"
       ]
     },
     "negative rep stosd": {
-      "ExpectedInstructionCount": 31,
+      "ExpectedInstructionCount": 36,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -949,14 +1072,17 @@
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "strb w20, [x28, #714]",
-        "mov w20, w4",
-        "mov x0, x5",
-        "mov x1, x11",
+        "mov x20, x4",
+        "mov w21, w20",
+        "mov x20, x11",
+        "mov x22, x5",
+        "mov x0, x22",
+        "mov x1, x20",
         "cbz x0, #+0x60",
         "sub x1, x1, #0x1c (28)",
         "sub x0, x0, #0x8 (8)",
         "tbnz x0, #63, #+0x3c",
-        "dup v1.4s, w20",
+        "dup v1.4s, w21",
         "sub x0, x0, #0x8 (8)",
         "tbnz x0, #63, #+0x14",
         "stp q1, q1, [x1], #-32",
@@ -973,15 +1099,17 @@
         "add x0, x0, #0x8 (8)",
         "cbz x0, #+0x14",
         "add x1, x1, #0x1c (28)",
-        "str w20, [x1], #-4",
+        "str w21, [x1], #-4",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0x8",
-        "sub x11, x11, x5, lsl #2",
-        "mov w5, #0x0"
+        "sub x23, x20, x22, lsl #2",
+        "mov w20, #0x0",
+        "mov x5, x20",
+        "mov x11, x23"
       ]
     },
     "negative rep stosq": {
-      "ExpectedInstructionCount": 30,
+      "ExpectedInstructionCount": 35,
       "Comment": [
         "When direction flag is a compile time constant we can optimize",
         "loads and stores can turn in to post-increment when known"
@@ -993,13 +1121,16 @@
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "strb w20, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
+        "mov x20, x4",
+        "mov x21, x11",
+        "mov x22, x5",
+        "mov x0, x22",
+        "mov x1, x21",
         "cbz x0, #+0x60",
         "sub x1, x1, #0x18 (24)",
         "sub x0, x0, #0x4 (4)",
         "tbnz x0, #63, #+0x3c",
-        "dup v1.2d, x4",
+        "dup v1.2d, x20",
         "sub x0, x0, #0x4 (4)",
         "tbnz x0, #63, #+0x14",
         "stp q1, q1, [x1], #-32",
@@ -1016,15 +1147,17 @@
         "add x0, x0, #0x4 (4)",
         "cbz x0, #+0x14",
         "add x1, x1, #0x18 (24)",
-        "str x4, [x1], #-8",
+        "str x20, [x1], #-8",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0x8",
-        "sub x11, x11, x5, lsl #3",
-        "mov w5, #0x0"
+        "sub x23, x21, x22, lsl #3",
+        "mov w20, #0x0",
+        "mov x5, x20",
+        "mov x11, x23"
       ]
     },
     "Sekiro spill block": {
-      "ExpectedInstructionCount": 176,
+      "ExpectedInstructionCount": 322,
       "Comment": [
         "This block of code came from the settings screen when it loaded",
         "It was originally at RIP: 0x14232cca0 and has been deobfuscated"
@@ -1151,182 +1284,328 @@
         "pop    rbx"
       ],
       "ExpectedArm64ASM": [
-        "str x5, [x8, #8]",
-        "str x7, [x8, #-8]!",
-        "str x9, [x8, #-8]!",
-        "str x10, [x8, #-8]!",
-        "str x11, [x8, #-8]!",
-        "str x16, [x8, #-8]!",
-        "str x17, [x8, #-8]!",
-        "str x19, [x8, #-8]!",
-        "str x29, [x8, #-8]!",
-        "sub x8, x8, #0x18 (24)",
-        "ldr w5, [x6, #36]",
-        "ldr w10, [x6]",
-        "ldr w9, [x6, #4]",
-        "ldr w19, [x6, #8]",
-        "ldr w29, [x6, #12]",
-        "ldr w16, [x6, #16]",
-        "ldr w17, [x6, #20]",
-        "ldr w15, [x6, #24]",
-        "ldr w7, [x6, #28]",
-        "ldr w11, [x6, #32]",
-        "mov w20, #0x13",
-        "mul w4, w5, w20",
-        "str w5, [x8, #104]",
-        "mov w21, #0x1000000",
-        "add w4, w4, w21",
-        "mov w21, w4",
-        "lsr w4, w21, #25",
-        "add w4, w4, w10",
-        "mov w21, w4",
-        "asr w4, w21, #26",
-        "add w4, w4, w9",
-        "mov w21, w4",
-        "asr w4, w21, #25",
-        "add w4, w4, w19",
-        "mov w21, w4",
-        "asr w4, w21, #26",
-        "add w4, w4, w29",
-        "mov w21, w4",
-        "asr w4, w21, #25",
-        "add w4, w4, w16",
-        "mov w21, w4",
-        "asr w4, w21, #26",
-        "add w4, w4, w17",
-        "mov w21, w4",
-        "asr w4, w21, #25",
-        "add w4, w4, w15",
-        "mov w21, w4",
-        "asr w4, w21, #26",
-        "add w4, w4, w7",
-        "mov w21, w4",
-        "asr w4, w21, #25",
-        "add w4, w4, w11",
-        "mov w21, w4",
-        "asr w4, w21, #26",
-        "add w4, w4, w5",
-        "mov w21, w4",
-        "asr w4, w21, #25",
-        "mul w4, w4, w20",
-        "add w10, w10, w4",
-        "mov w4, w10",
-        "asr w4, w4, #26",
-        "add w9, w9, w4",
-        "mov w20, w4",
-        "lsl w4, w20, #26",
-        "sub w10, w10, w4",
-        "mov w5, w9",
-        "ldr x20, [x8, #96]",
+        "sub sp, sp, #0x100 (256)",
+        "mov x20, x5",
+        "mov x21, x8",
+        "str x20, [x21, #8]",
+        "mov x20, x7",
+        "mov x22, x21",
+        "str x20, [x22, #-8]!",
+        "mov x8, x22",
+        "mov x20, x9",
+        "mov x21, x22",
+        "str x20, [x21, #-8]!",
+        "mov x8, x21",
+        "mov x20, x10",
+        "mov x22, x21",
+        "str x20, [x22, #-8]!",
+        "mov x8, x22",
+        "mov x20, x11",
+        "mov x21, x22",
+        "str x20, [x21, #-8]!",
+        "mov x8, x21",
+        "mov x20, x16",
+        "mov x22, x21",
+        "str x20, [x22, #-8]!",
+        "mov x8, x22",
+        "mov x20, x17",
+        "mov x21, x22",
+        "str x20, [x21, #-8]!",
+        "mov x8, x21",
+        "mov x20, x19",
+        "mov x22, x21",
+        "str x20, [x22, #-8]!",
+        "mov x8, x22",
+        "mov x20, x29",
+        "mov x21, x22",
+        "str x20, [x21, #-8]!",
+        "mov x8, x21",
+        "sub x20, x21, #0x18 (24)",
+        "mov x8, x20",
+        "mov x21, x6",
+        "ldr w22, [x21, #36]",
+        "mov x5, x22",
+        "ldr w23, [x21]",
+        "mov x10, x23",
+        "ldr w24, [x21, #4]",
+        "mov x9, x24",
+        "ldr w25, [x21, #8]",
+        "mov x19, x25",
+        "ldr w30, [x21, #12]",
+        "mov x29, x30",
+        "ldr w18, [x21, #16]",
+        "mov x16, x18",
+        "str w18, [sp]",
+        "ldr w18, [x21, #20]",
+        "mov x17, x18",
+        "str w18, [sp, #32]",
+        "ldr w18, [x21, #24]",
+        "mov x15, x18",
+        "str w18, [sp, #64]",
+        "ldr w18, [x21, #28]",
+        "mov x7, x18",
+        "str w18, [sp, #96]",
+        "ldr w18, [x21, #32]",
+        "mov x11, x18",
+        "mov w21, #0x13",
+        "str w18, [sp, #128]",
+        "mul w18, w22, w21",
+        "mov x4, x18",
+        "str w22, [x20, #104]",
+        "str x20, [sp, #160]",
+        "mov w20, #0x1000000",
+        "str w21, [sp, #192]",
+        "add w21, w18, w20",
+        "mov x4, x21",
+        "mov w20, w21",
+        "lsr w21, w20, #25",
+        "mov x4, x21",
+        "add w20, w21, w23",
+        "mov x4, x20",
+        "mov w21, w20",
+        "asr w20, w21, #26",
+        "mov x4, x20",
+        "add w21, w20, w24",
+        "mov x4, x21",
+        "mov w20, w21",
+        "asr w21, w20, #25",
+        "mov x4, x21",
+        "add w20, w21, w25",
+        "mov x4, x20",
+        "mov w21, w20",
+        "asr w20, w21, #26",
+        "mov x4, x20",
+        "add w21, w20, w30",
+        "mov x4, x21",
+        "mov w20, w21",
+        "asr w21, w20, #25",
+        "mov x4, x21",
+        "ldr w20, [sp]",
+        "add w18, w21, w20",
+        "mov x4, x18",
+        "mov w21, w18",
+        "asr w18, w21, #26",
+        "mov x4, x18",
+        "ldr w21, [sp, #32]",
+        "add w20, w18, w21",
+        "mov x4, x20",
+        "mov w18, w20",
+        "asr w20, w18, #25",
+        "mov x4, x20",
+        "ldr w18, [sp, #64]",
+        "add w21, w20, w18",
+        "mov x4, x21",
+        "mov w20, w21",
+        "asr w21, w20, #26",
+        "mov x4, x21",
+        "ldr w20, [sp, #96]",
+        "add w18, w21, w20",
+        "mov x4, x18",
+        "mov w21, w18",
+        "asr w18, w21, #25",
+        "mov x4, x18",
+        "ldr w21, [sp, #128]",
+        "add w20, w18, w21",
         "mov x4, x20",
-        "asr w5, w5, #25",
-        "add w19, w19, w5",
-        "mov w21, w5",
-        "lsl w5, w21, #25",
-        "mov w6, w19",
-        "sub w9, w9, w5",
-        "asr w6, w6, #26",
-        "add w29, w29, w6",
-        "mov w21, w10",
-        "str w21, [x20]",
-        "mov w12, w29",
-        "mov w21, w6",
-        "lsl w6, w21, #26",
-        "asr w12, w12, #25",
-        "sub w19, w19, w6",
-        "add w16, w16, w12",
-        "mov w21, w9",
-        "str w21, [x20, #4]",
-        "mov w13, w16",
-        "mov w21, w12",
-        "lsl w12, w21, #25",
-        "asr w13, w13, #26",
-        "sub w29, w29, w12",
-        "add w17, w17, w13",
-        "mov w21, w19",
-        "str w21, [x20, #8]",
-        "mov w21, w13",
-        "lsl w13, w21, #26",
-        "mov w14, w17",
-        "asr w14, w14, #25",
-        "sub w16, w16, w13",
-        "add w15, w15, w14",
-        "mov w21, w29",
-        "str w21, [x20, #12]",
-        "mov w21, w15",
-        "str w21, [x8, #112]",
+        "mov w18, w20",
+        "asr w20, w18, #26",
+        "mov x4, x20",
+        "add w18, w20, w22",
+        "mov x4, x18",
+        "mov w20, w18",
+        "asr w22, w20, #25",
+        "mov x4, x22",
+        "ldr w20, [sp, #192]",
+        "mul w18, w22, w20",
+        "mov x4, x18",
+        "add w20, w23, w18",
         "mov x10, x20",
-        "mov w21, w15",
-        "asr w15, w21, #26",
-        "add w7, w7, w15",
-        "mov w21, w16",
-        "str w21, [x20, #16]",
-        "mov w21, w7",
-        "str w21, [x8, #120]",
-        "mov w21, w7",
-        "asr w7, w21, #25",
-        "add w11, w11, w7",
-        "mov w21, w11",
-        "str w21, [x8]",
-        "mov w21, w11",
-        "asr w11, w21, #26",
-        "ldr w21, [x8, #104]",
-        "add w21, w21, w11",
-        "str w21, [x8, #104]",
-        "mov w21, w14",
-        "lsl w14, w21, #25",
-        "ldr w5, [x8, #104]",
-        "sub w17, w17, w14",
-        "mov w21, w17",
-        "str w21, [x20, #20]",
-        "ldr w4, [x8, #112]",
-        "mov w21, w15",
-        "lsl w15, w21, #26",
-        "sub w4, w4, w15",
-        "mov w21, w7",
-        "lsl w7, w21, #25",
-        "mov w21, w4",
-        "str w21, [x20, #24]",
-        "ldr w4, [x8, #120]",
-        "sub w4, w4, w7",
-        "mov w21, w11",
-        "lsl w11, w21, #26",
-        "mov w21, w4",
-        "str w21, [x20, #28]",
-        "ldr w4, [x8]",
-        "sub w4, w4, w11",
-        "mov w21, w4",
-        "str w21, [x20, #32]",
-        "mov x4, x5",
-        "and w4, w5, #0xfe000000",
-        "sub w5, w5, w4",
-        "mov w21, w5",
-        "str w21, [x20, #36]",
-        "mvn w27, w8",
-        "adds x26, x8, #0x18 (24)",
-        "mov x8, x26",
-        "ldr x29, [x26]",
-        "add x20, x26, #0x8 (8)",
+        "mov w22, w20",
+        "mov x4, x22",
+        "asr w23, w22, #26",
+        "mov x4, x23",
+        "add w22, w24, w23",
+        "mov x9, x22",
+        "mov w24, w23",
+        "lsl w23, w24, #26",
+        "mov x4, x23",
+        "sub w24, w20, w23",
+        "mov x10, x24",
+        "mov w20, w22",
+        "mov x5, x20",
+        "ldr x23, [sp, #160]",
+        "ldr x18, [x23, #96]",
+        "mov x4, x18",
+        "asr w21, w20, #25",
+        "mov x5, x21",
+        "add w20, w25, w21",
+        "mov x19, x20",
+        "mov w25, w21",
+        "lsl w21, w25, #25",
+        "mov x5, x21",
+        "mov w25, w20",
+        "mov x6, x25",
+        "sub w23, w22, w21",
+        "mov x9, x23",
+        "asr w21, w25, #26",
+        "mov x6, x21",
+        "add w22, w30, w21",
+        "mov x29, x22",
+        "mov w25, w24",
+        "str w25, [x18]",
+        "mov w24, w22",
+        "mov x12, x24",
+        "mov w25, w21",
+        "lsl w21, w25, #26",
+        "mov x6, x21",
+        "asr w25, w24, #25",
+        "mov x12, x25",
+        "sub w24, w20, w21",
+        "mov x19, x24",
+        "ldr w20, [sp]",
+        "add w21, w20, w25",
+        "mov x16, x21",
+        "mov w20, w23",
+        "str w20, [x18, #4]",
+        "mov w20, w21",
+        "mov x13, x20",
+        "mov w23, w25",
+        "lsl w25, w23, #25",
+        "mov x12, x25",
+        "asr w23, w20, #26",
+        "mov x13, x23",
+        "sub w20, w22, w25",
+        "mov x29, x20",
+        "ldr w22, [sp, #32]",
+        "add w25, w22, w23",
+        "mov x17, x25",
+        "mov w22, w24",
+        "str w22, [x18, #8]",
+        "mov w22, w23",
+        "lsl w23, w22, #26",
+        "mov x13, x23",
+        "mov w22, w25",
+        "mov x14, x22",
+        "asr w24, w22, #25",
+        "mov x14, x24",
+        "sub w22, w21, w23",
+        "mov x16, x22",
+        "ldr w21, [sp, #64]",
+        "add w23, w21, w24",
+        "mov x15, x23",
+        "mov w21, w20",
+        "str w21, [x18, #12]",
+        "mov w20, w23",
+        "ldr x21, [sp, #160]",
+        "str w20, [x21, #112]",
+        "mov x10, x18",
+        "mov w20, w23",
+        "asr w23, w20, #26",
+        "mov x15, x23",
+        "ldr w20, [sp, #96]",
+        "add w30, w20, w23",
+        "mov x7, x30",
+        "mov w20, w22",
+        "str w20, [x18, #16]",
+        "mov w20, w30",
+        "str w20, [x21, #120]",
+        "mov w20, w30",
+        "asr w22, w20, #25",
+        "mov x7, x22",
+        "ldr w20, [sp, #128]",
+        "add w30, w20, w22",
+        "mov x11, x30",
+        "mov w20, w30",
+        "str w20, [x21]",
+        "mov w20, w30",
+        "asr w30, w20, #26",
+        "mov x11, x30",
+        "ldr w20, [x21, #104]",
+        "str w22, [sp, #224]",
+        "add w22, w20, w30",
+        "str w22, [x21, #104]",
+        "mov w20, w24",
+        "lsl w22, w20, #25",
+        "mov x14, x22",
+        "ldr w20, [x21, #104]",
+        "mov x5, x20",
+        "sub w24, w25, w22",
+        "mov x17, x24",
+        "mov w22, w24",
+        "str w22, [x18, #20]",
+        "ldr w22, [x21, #112]",
+        "mov x4, x22",
+        "mov w24, w23",
+        "lsl w23, w24, #26",
+        "mov x15, x23",
+        "sub w24, w22, w23",
+        "mov x4, x24",
+        "ldr w22, [sp, #224]",
+        "mov w23, w22",
+        "lsl w22, w23, #25",
+        "mov x7, x22",
+        "mov w23, w24",
+        "str w23, [x18, #24]",
+        "ldr w23, [x21, #120]",
+        "mov x4, x23",
+        "sub w24, w23, w22",
+        "mov x4, x24",
+        "mov w22, w30",
+        "lsl w23, w22, #26",
+        "mov x11, x23",
+        "mov w22, w24",
+        "str w22, [x18, #28]",
+        "ldr w22, [x21]",
+        "mov x4, x22",
+        "sub w24, w22, w23",
+        "mov x4, x24",
+        "mov w22, w24",
+        "str w22, [x18, #32]",
+        "mov x4, x20",
+        "and w22, w20, #0xfe000000",
+        "mov x4, x22",
+        "sub w23, w20, w22",
+        "mov x5, x23",
+        "mov w20, w23",
+        "str w20, [x18, #36]",
+        "mvn w20, w21",
+        "mov x27, x20",
+        "adds x20, x21, #0x18 (24)",
+        "mov x26, x20",
         "mov x8, x20",
-        "ldr x19, [x26, #8]",
-        "add x21, x20, #0x8 (8)",
-        "mov x8, x21",
-        "ldr x17, [x20, #8]",
-        "add x20, x21, #0x8 (8)",
+        "ldr x21, [x20]",
+        "add x22, x20, #0x8 (8)",
+        "mov x8, x22",
+        "mov x29, x21",
+        "ldr x21, [x20, #8]",
+        "add x20, x22, #0x8 (8)",
         "mov x8, x20",
-        "ldr x16, [x21, #8]",
-        "add x21, x20, #0x8 (8)",
-        "mov x8, x21",
-        "ldr x11, [x20, #8]",
-        "add x20, x21, #0x8 (8)",
+        "mov x19, x21",
+        "ldr x21, [x22, #8]",
+        "add x22, x20, #0x8 (8)",
+        "mov x8, x22",
+        "mov x17, x21",
+        "ldr x21, [x20, #8]",
+        "add x20, x22, #0x8 (8)",
         "mov x8, x20",
-        "ldr x10, [x21, #8]",
-        "add x21, x20, #0x8 (8)",
-        "mov x8, x21",
-        "ldr x9, [x20, #8]",
-        "add x8, x21, #0x8 (8)",
-        "ldr x7, [x21, #8]",
-        "add x8, x8, #0x8 (8)"
+        "mov x16, x21",
+        "ldr x21, [x22, #8]",
+        "add x22, x20, #0x8 (8)",
+        "mov x8, x22",
+        "mov x11, x21",
+        "ldr x21, [x20, #8]",
+        "add x20, x22, #0x8 (8)",
+        "mov x8, x20",
+        "mov x10, x21",
+        "ldr x21, [x22, #8]",
+        "add x22, x20, #0x8 (8)",
+        "mov x8, x22",
+        "mov x9, x21",
+        "ldr x21, [x20, #8]",
+        "add x20, x22, #0x8 (8)",
+        "mov x8, x20",
+        "mov x7, x21",
+        "add sp, sp, #0x100 (256)"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FEXOpt/MultiInst_AFP.json b/unittests/InstructionCountCI/FEXOpt/MultiInst_AFP.json
index 5644759ae7..9b23f4a55f 100644
--- a/unittests/InstructionCountCI/FEXOpt/MultiInst_AFP.json
+++ b/unittests/InstructionCountCI/FEXOpt/MultiInst_AFP.json
@@ -15,7 +15,7 @@
   ],
   "Instructions": {
     "adds xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Redundant scalar operations should get eliminated with AFP"
       ],
@@ -24,8 +24,15 @@
         "addss xmm0, xmm2"
       ],
       "ExpectedArm64ASM": [
-        "fadd s16, s16, s17",
-        "fadd s16, s16, s18"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fadd s4, s2, s3",
+        "mov v16.16b, v4.16b",
+        "mov v2.16b, v18.16b",
+        "mov v3.16b, v4.16b",
+        "fadd s3, s4, s2",
+        "mov v16.16b, v3.16b"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FEXOpt/libnss.json b/unittests/InstructionCountCI/FEXOpt/libnss.json
index 0fd3a1537a..ccbbab2dcc 100644
--- a/unittests/InstructionCountCI/FEXOpt/libnss.json
+++ b/unittests/InstructionCountCI/FEXOpt/libnss.json
@@ -16,7 +16,7 @@
   "Comment": [],
   "Instructions": {
     "libnss3 sha": {
-      "ExpectedInstructionCount": 2391,
+      "ExpectedInstructionCount": 2671,
       "Comment": [
         "This block of code comes from libnss3 which causes panic spilling in FEX's RA.",
         "This code is hit in steamwebhelper calling in to this function.",
@@ -193,2397 +193,2677 @@
         "movups  [rdi+0x110], xmm4"
       ],
       "ExpectedArm64ASM": [
-        "ldr q18, [x11, #256]",
-        "ldr q19, [x11, #272]",
-        "ldr q24, [x11]",
-        "ldr q23, [x11, #16]",
+        "sub sp, sp, #0x120 (288)",
+        "mov x20, x11",
+        "ldr q2, [x20, #256]",
+        "mov v18.16b, v2.16b",
+        "ldr q3, [x20, #272]",
+        "mov v19.16b, v3.16b",
+        "ldr q4, [x20]",
+        "mov v24.16b, v4.16b",
+        "ldr q5, [x20, #16]",
+        "mov v23.16b, v5.16b",
         "ldr x0, [x28, #1760]",
-        "ldr q2, [x0, #2832]",
-        "tbl v16.16b, {v18.16b}, v2.16b",
+        "ldr q6, [x0, #2832]",
+        "tbl v7.16b, {v2.16b}, v6.16b",
+        "mov v16.16b, v7.16b",
         "ldr x0, [x28, #1760]",
-        "ldr q3, [x0, #432]",
-        "tbl v18.16b, {v19.16b}, v3.16b",
-        "ldr q22, [x11, #32]",
-        "ldr q21, [x11, #48]",
-        "mov v19.16b, v16.16b",
-        "ext v19.16b, v18.16b, v16.16b, #8",
-        "mov v18.d[1], v16.d[1]",
-        "mov w20, #0x1000",
-        "movk w20, #0x1, lsl #16",
-        "ldr q16, [x29, x20, sxtx]",
-        "mov v20.16b, v18.16b",
-        "mov v17.16b, v19.16b",
-        "movi v4.16b, #0x8f",
-        "and v4.16b, v16.16b, v4.16b",
-        "tbl v21.16b, {v21.16b}, v4.16b",
-        "movi v4.16b, #0x8f",
-        "and v4.16b, v16.16b, v4.16b",
-        "tbl v22.16b, {v22.16b}, v4.16b",
-        "movi v4.16b, #0x8f",
-        "and v4.16b, v16.16b, v4.16b",
-        "tbl v23.16b, {v23.16b}, v4.16b",
-        "movi v4.16b, #0x8f",
-        "and v4.16b, v16.16b, v4.16b",
-        "tbl v24.16b, {v24.16b}, v4.16b",
-        "ldr q16, [x29, x20, sxtx]",
-        "add v16.4s, v16.4s, v24.4s",
+        "ldr q2, [x0, #432]",
+        "tbl v8.16b, {v3.16b}, v2.16b",
+        "mov v18.16b, v8.16b",
+        "ldr q3, [x20, #32]",
+        "mov v22.16b, v3.16b",
+        "ldr q9, [x20, #48]",
+        "mov v21.16b, v9.16b",
+        "mov v19.16b, v7.16b",
+        "ext v10.16b, v8.16b, v7.16b, #8",
+        "mov v19.16b, v10.16b",
+        "mov v11.16b, v8.16b",
+        "mov v11.d[1], v7.d[1]",
+        "mov v18.16b, v11.16b",
+        "mov x21, x29",
+        "mov w22, #0x1000",
+        "movk w22, #0x1, lsl #16",
+        "ldr q7, [x21, x22, sxtx]",
+        "mov v16.16b, v7.16b",
+        "mov v20.16b, v11.16b",
+        "mov v17.16b, v10.16b",
+        "movi v8.16b, #0x8f",
+        "and v12.16b, v7.16b, v8.16b",
+        "tbl v8.16b, {v9.16b}, v12.16b",
+        "mov v21.16b, v8.16b",
+        "movi v9.16b, #0x8f",
+        "and v12.16b, v7.16b, v9.16b",
+        "tbl v9.16b, {v3.16b}, v12.16b",
+        "mov v22.16b, v9.16b",
+        "movi v3.16b, #0x8f",
+        "and v12.16b, v7.16b, v3.16b",
+        "tbl v3.16b, {v5.16b}, v12.16b",
+        "mov v23.16b, v3.16b",
+        "movi v5.16b, #0x8f",
+        "and v12.16b, v7.16b, v5.16b",
+        "tbl v5.16b, {v4.16b}, v12.16b",
+        "mov v24.16b, v5.16b",
+        "ldr q4, [x21, x22, sxtx]",
+        "mov v16.16b, v4.16b",
+        "add v7.4s, v4.4s, v5.4s",
+        "mov v16.16b, v7.16b",
+        "mov v0.16b, v5.16b",
         "unimplemented (Unimplemented)",
-        "mov w20, v19.s[1]",
-        "mov w21, v19.s[0]",
-        "mov w22, v18.s[1]",
-        "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
-        "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v18.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v19.s[3]",
-        "mov w24, v19.s[2]",
-        "mov w25, v18.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v18.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v18.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v18.s[3]",
-        "add w20, w20, w23",
-        "mov v4.16b, v18.16b",
-        "mov v4.s[3], w21",
-        "mov v4.s[2], w25",
-        "mov v4.s[1], w20",
-        "mov v20.16b, v4.16b",
-        "mov v20.s[0], w22",
+        "mov v4.16b, v0.16b",
+        "mov v24.16b, v4.16b",
+        "mov w23, v10.s[1]",
+        "mov w24, v10.s[0]",
+        "mov w25, v11.s[1]",
+        "and w30, w23, w24",
+        "bic w18, w25, w23",
+        "eor w25, w30, w18",
+        "ror w30, w23, #6",
+        "eor w18, w30, w23, ror #11",
+        "eor w30, w18, w23, ror #25",
+        "add w18, w25, w30",
+        "mov w25, v7.s[0]",
+        "add w30, w18, w25",
+        "mov w25, v11.s[0]",
+        "add w18, w30, w25",
+        "mov w25, v10.s[3]",
+        "mov w30, v10.s[2]",
+        "str x20, [sp]",
+        "mov w20, v11.s[3]",
+        "str x21, [sp, #32]",
+        "and w21, w30, w20",
+        "str x22, [sp, #64]",
+        "orr w22, w30, w20",
+        "and w20, w25, w22",
+        "orr w22, w20, w21",
+        "add w20, w18, w22",
+        "ror w21, w25, #2",
+        "eor w22, w21, w25, ror #13",
+        "eor w21, w22, w25, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v11.s[2]",
+        "add w21, w18, w20",
+        "and w20, w21, w23",
+        "bic w23, w24, w21",
+        "eor w24, w20, w23",
+        "ror w20, w21, #6",
+        "eor w23, w20, w21, ror #11",
+        "eor w20, w23, w21, ror #25",
+        "add w23, w24, w20",
+        "mov w20, v7.s[1]",
+        "add w24, w23, w20",
+        "mov w20, v11.s[1]",
+        "add w23, w24, w20",
+        "and w20, w25, w30",
+        "orr w24, w25, w30",
+        "and w25, w22, w24",
+        "orr w24, w25, w20",
+        "add w20, w23, w24",
+        "ror w24, w22, #2",
+        "eor w25, w24, w22, ror #13",
+        "eor w24, w25, w22, ror #22",
+        "add w25, w20, w24",
+        "mov w20, v11.s[3]",
+        "add w24, w23, w20",
+        "mov v5.16b, v11.16b",
+        "mov v5.s[3], w25",
+        "mov v12.16b, v5.16b",
+        "mov v12.s[2], w22",
+        "mov v5.16b, v12.16b",
+        "mov v5.s[1], w24",
+        "mov v12.16b, v5.16b",
+        "mov v12.s[0], w21",
+        "mov v20.16b, v12.16b",
         "ldr x0, [x28, #1760]",
-        "ldr q4, [x0, #224]",
-        "tbl v16.16b, {v16.16b}, v4.16b",
-        "mov w20, v20.s[1]",
-        "mov w21, v20.s[0]",
-        "mov w22, v19.s[1]",
+        "ldr q5, [x0, #224]",
+        "tbl v13.16b, {v7.16b}, v5.16b",
+        "mov v16.16b, v13.16b",
+        "mov w20, v12.s[1]",
+        "mov w21, v12.s[0]",
+        "mov w22, v10.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v19.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[3]",
-        "mov w24, v20.s[2]",
-        "mov w25, v19.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v19.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v19.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v19.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v19.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v17.16b, v5.16b",
-        "mov v17.s[0], w22",
-        "mov v16.16b, v21.16b",
-        "ext v16.16b, v22.16b, v21.16b, #4",
-        "add v24.4s, v24.4s, v16.4s",
-        "mov w20, #0x1000",
-        "movk w20, #0x1, lsl #16",
-        "ldr q16, [x29, x20, sxtx]",
-        "mov w20, v21.s[2]",
-        "mov w21, v21.s[3]",
-        "mov w22, v24.s[0]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w20, w20, #10",
-        "eor w20, w23, w20",
-        "add w20, w22, w20",
-        "mov w22, v24.s[1]",
-        "ror w23, w21, #17",
-        "ror w24, w21, #19",
-        "eor w23, w23, w24",
-        "lsr w21, w21, #10",
-        "eor w21, w23, w21",
-        "add w21, w22, w21",
-        "mov w22, v24.s[2]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w24, w20, #10",
-        "eor w23, w23, w24",
-        "add w22, w22, w23",
-        "mov w23, v24.s[3]",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v13.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v10.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v12.s[3]",
+        "mov w23, v12.s[2]",
+        "mov w25, v10.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v10.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v13.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v10.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v10.s[3]",
+        "add w21, w24, w20",
+        "mov v7.16b, v10.16b",
+        "mov v7.s[3], w22",
+        "mov v13.16b, v7.16b",
+        "mov v13.s[2], w18",
+        "mov v7.16b, v13.16b",
+        "mov v7.s[1], w21",
+        "mov v13.16b, v7.16b",
+        "mov v13.s[0], w30",
+        "mov v17.16b, v13.16b",
+        "mov v16.16b, v8.16b",
+        "ext v7.16b, v9.16b, v8.16b, #4",
+        "mov v16.16b, v7.16b",
+        "add v14.4s, v4.4s, v7.4s",
+        "mov v24.16b, v14.16b",
+        "ldr x20, [sp, #32]",
+        "ldr x21, [sp, #64]",
+        "ldr q4, [x20, x21, sxtx]",
+        "mov v16.16b, v4.16b",
+        "mov w21, v8.s[2]",
+        "mov w22, v8.s[3]",
+        "mov w23, v14.s[0]",
         "ror w24, w21, #17",
         "ror w25, w21, #19",
-        "eor w24, w24, w25",
-        "lsr w25, w21, #10",
-        "eor w24, w24, w25",
-        "add w23, w23, w24",
-        "mov v5.16b, v24.16b",
-        "mov v5.s[3], w23",
-        "mov v5.s[2], w22",
-        "mov v5.s[1], w21",
-        "mov v24.16b, v5.16b",
-        "mov v24.s[0], w20",
-        "add v16.4s, v16.4s, v23.4s",
+        "eor w30, w24, w25",
+        "lsr w24, w21, #10",
+        "eor w21, w30, w24",
+        "add w24, w23, w21",
+        "mov w21, v14.s[1]",
+        "ror w23, w22, #17",
+        "ror w25, w22, #19",
+        "eor w30, w23, w25",
+        "lsr w23, w22, #10",
+        "eor w22, w30, w23",
+        "add w23, w21, w22",
+        "mov w21, v14.s[2]",
+        "ror w22, w24, #17",
+        "ror w25, w24, #19",
+        "eor w30, w22, w25",
+        "lsr w22, w24, #10",
+        "eor w25, w30, w22",
+        "add w22, w21, w25",
+        "mov w21, v14.s[3]",
+        "ror w25, w23, #17",
+        "ror w30, w23, #19",
+        "eor w18, w25, w30",
+        "lsr w25, w23, #10",
+        "eor w30, w18, w25",
+        "add w25, w21, w30",
+        "mov v7.16b, v14.16b",
+        "mov v7.s[3], w25",
+        "mov v14.16b, v7.16b",
+        "mov v14.s[2], w22",
+        "mov v7.16b, v14.16b",
+        "mov v7.s[1], w23",
+        "mov v14.16b, v7.16b",
+        "mov v14.s[0], w24",
+        "mov v24.16b, v14.16b",
+        "add v7.4s, v4.4s, v3.4s",
+        "mov v16.16b, v7.16b",
+        "mov v0.16b, v3.16b",
         "unimplemented (Unimplemented)",
-        "mov w20, v17.s[1]",
-        "mov w21, v17.s[0]",
-        "mov w22, v20.s[1]",
-        "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
-        "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[3]",
-        "mov w24, v17.s[2]",
-        "mov w25, v20.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v20.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v20.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v20.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v20.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v20.16b, v5.16b",
-        "mov v20.s[0], w22",
-        "tbl v16.16b, {v16.16b}, v4.16b",
-        "mov w20, v20.s[1]",
-        "mov w21, v20.s[0]",
-        "mov w22, v17.s[1]",
+        "mov v4.16b, v0.16b",
+        "mov v23.16b, v4.16b",
+        "mov w21, v13.s[1]",
+        "mov w22, v13.s[0]",
+        "mov w23, v12.s[1]",
+        "and w24, w21, w22",
+        "bic w25, w23, w21",
+        "eor w23, w24, w25",
+        "ror w24, w21, #6",
+        "eor w25, w24, w21, ror #11",
+        "eor w24, w25, w21, ror #25",
+        "add w25, w23, w24",
+        "mov w23, v7.s[0]",
+        "add w24, w25, w23",
+        "mov w23, v12.s[0]",
+        "add w25, w24, w23",
+        "mov w23, v13.s[3]",
+        "mov w24, v13.s[2]",
+        "mov w30, v12.s[3]",
+        "and w18, w24, w30",
+        "orr w20, w24, w30",
+        "and w30, w23, w20",
+        "orr w20, w30, w18",
+        "add w30, w25, w20",
+        "ror w20, w23, #2",
+        "eor w18, w20, w23, ror #13",
+        "eor w20, w18, w23, ror #22",
+        "add w18, w30, w20",
+        "mov w20, v12.s[2]",
+        "add w30, w25, w20",
+        "and w20, w30, w21",
+        "bic w21, w22, w30",
+        "eor w22, w20, w21",
+        "ror w20, w30, #6",
+        "eor w21, w20, w30, ror #11",
+        "eor w20, w21, w30, ror #25",
+        "add w21, w22, w20",
+        "mov w20, v7.s[1]",
+        "add w22, w21, w20",
+        "mov w20, v12.s[1]",
+        "add w21, w22, w20",
+        "and w20, w23, w24",
+        "orr w22, w23, w24",
+        "and w23, w18, w22",
+        "orr w22, w23, w20",
+        "add w20, w21, w22",
+        "ror w22, w18, #2",
+        "eor w23, w22, w18, ror #13",
+        "eor w22, w23, w18, ror #22",
+        "add w23, w20, w22",
+        "mov w20, v12.s[3]",
+        "add w22, w21, w20",
+        "mov v3.16b, v12.16b",
+        "mov v3.s[3], w23",
+        "mov v12.16b, v3.16b",
+        "mov v12.s[2], w18",
+        "mov v3.16b, v12.16b",
+        "mov v3.s[1], w22",
+        "mov v12.16b, v3.16b",
+        "mov v12.s[0], w30",
+        "mov v20.16b, v12.16b",
+        "tbl v3.16b, {v7.16b}, v5.16b",
+        "mov v16.16b, v3.16b",
+        "mov w20, v12.s[1]",
+        "mov w21, v12.s[0]",
+        "mov w22, v13.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[3]",
-        "mov w24, v20.s[2]",
-        "mov w25, v17.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v17.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v17.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v17.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v17.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v17.16b, v5.16b",
-        "mov v17.s[0], w22",
-        "mov v16.16b, v24.16b",
-        "ext v16.16b, v21.16b, v24.16b, #4",
-        "add v23.4s, v23.4s, v16.4s",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v3.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v13.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v12.s[3]",
+        "mov w23, v12.s[2]",
+        "mov w25, v13.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v13.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v3.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v13.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v13.s[3]",
+        "add w21, w24, w20",
+        "mov v3.16b, v13.16b",
+        "mov v3.s[3], w22",
+        "mov v7.16b, v3.16b",
+        "mov v7.s[2], w18",
+        "mov v3.16b, v7.16b",
+        "mov v3.s[1], w21",
+        "mov v7.16b, v3.16b",
+        "mov v7.s[0], w30",
+        "mov v17.16b, v7.16b",
+        "mov v16.16b, v14.16b",
+        "ext v3.16b, v8.16b, v14.16b, #4",
+        "mov v16.16b, v3.16b",
+        "add v13.4s, v4.4s, v3.4s",
+        "mov v23.16b, v13.16b",
         "mov w20, #0x1000",
         "movk w20, #0x1, lsl #16",
-        "ldr q16, [x29, x20, sxtx]",
-        "mov w20, v24.s[2]",
-        "mov w21, v24.s[3]",
-        "mov w22, v23.s[0]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w20, w20, #10",
-        "eor w20, w23, w20",
-        "add w20, w22, w20",
-        "mov w22, v23.s[1]",
-        "ror w23, w21, #17",
-        "ror w24, w21, #19",
-        "eor w23, w23, w24",
-        "lsr w21, w21, #10",
-        "eor w21, w23, w21",
-        "add w21, w22, w21",
-        "mov w22, v23.s[2]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w24, w20, #10",
-        "eor w23, w23, w24",
-        "add w22, w22, w23",
-        "mov w23, v23.s[3]",
-        "ror w24, w21, #17",
-        "ror w25, w21, #19",
-        "eor w24, w24, w25",
-        "lsr w25, w21, #10",
-        "eor w24, w24, w25",
-        "add w23, w23, w24",
-        "mov v5.16b, v23.16b",
-        "mov v5.s[3], w23",
-        "mov v5.s[2], w22",
-        "mov v5.s[1], w21",
-        "mov v23.16b, v5.16b",
-        "mov v23.s[0], w20",
-        "add v16.4s, v16.4s, v22.4s",
+        "ldr x21, [sp, #32]",
+        "ldr q3, [x21, x20, sxtx]",
+        "mov v16.16b, v3.16b",
+        "mov w22, v14.s[2]",
+        "mov w23, v14.s[3]",
+        "mov w24, v13.s[0]",
+        "ror w25, w22, #17",
+        "ror w30, w22, #19",
+        "eor w18, w25, w30",
+        "lsr w25, w22, #10",
+        "eor w22, w18, w25",
+        "add w25, w24, w22",
+        "mov w22, v13.s[1]",
+        "ror w24, w23, #17",
+        "ror w30, w23, #19",
+        "eor w18, w24, w30",
+        "lsr w24, w23, #10",
+        "eor w23, w18, w24",
+        "add w24, w22, w23",
+        "mov w22, v13.s[2]",
+        "ror w23, w25, #17",
+        "ror w30, w25, #19",
+        "eor w18, w23, w30",
+        "lsr w23, w25, #10",
+        "eor w30, w18, w23",
+        "add w23, w22, w30",
+        "mov w22, v13.s[3]",
+        "ror w30, w24, #17",
+        "ror w18, w24, #19",
+        "str x20, [sp, #96]",
+        "eor w20, w30, w18",
+        "lsr w30, w24, #10",
+        "eor w18, w20, w30",
+        "add w20, w22, w18",
+        "mov v4.16b, v13.16b",
+        "mov v4.s[3], w20",
+        "mov v13.16b, v4.16b",
+        "mov v13.s[2], w23",
+        "mov v4.16b, v13.16b",
+        "mov v4.s[1], w24",
+        "mov v13.16b, v4.16b",
+        "mov v13.s[0], w25",
+        "mov v23.16b, v13.16b",
+        "add v4.4s, v3.4s, v9.4s",
+        "mov v16.16b, v4.16b",
+        "mov v0.16b, v9.16b",
         "unimplemented (Unimplemented)",
-        "mov w20, v17.s[1]",
-        "mov w21, v17.s[0]",
-        "mov w22, v20.s[1]",
-        "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
-        "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[3]",
-        "mov w24, v17.s[2]",
-        "mov w25, v20.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v20.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v20.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v20.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v20.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v20.16b, v5.16b",
-        "mov v20.s[0], w22",
-        "tbl v16.16b, {v16.16b}, v4.16b",
-        "mov w20, v20.s[1]",
-        "mov w21, v20.s[0]",
-        "mov w22, v17.s[1]",
+        "mov v3.16b, v0.16b",
+        "mov v22.16b, v3.16b",
+        "mov w20, v7.s[1]",
+        "mov w22, v7.s[0]",
+        "mov w23, v12.s[1]",
+        "and w24, w20, w22",
+        "bic w25, w23, w20",
+        "eor w23, w24, w25",
+        "ror w24, w20, #6",
+        "eor w25, w24, w20, ror #11",
+        "eor w24, w25, w20, ror #25",
+        "add w25, w23, w24",
+        "mov w23, v4.s[0]",
+        "add w24, w25, w23",
+        "mov w23, v12.s[0]",
+        "add w25, w24, w23",
+        "mov w23, v7.s[3]",
+        "mov w24, v7.s[2]",
+        "mov w30, v12.s[3]",
+        "and w18, w24, w30",
+        "orr w21, w24, w30",
+        "and w30, w23, w21",
+        "orr w21, w30, w18",
+        "add w30, w25, w21",
+        "ror w21, w23, #2",
+        "eor w18, w21, w23, ror #13",
+        "eor w21, w18, w23, ror #22",
+        "add w18, w30, w21",
+        "mov w21, v12.s[2]",
+        "add w30, w25, w21",
+        "and w21, w30, w20",
+        "bic w20, w22, w30",
+        "eor w22, w21, w20",
+        "ror w20, w30, #6",
+        "eor w21, w20, w30, ror #11",
+        "eor w20, w21, w30, ror #25",
+        "add w21, w22, w20",
+        "mov w20, v4.s[1]",
+        "add w22, w21, w20",
+        "mov w20, v12.s[1]",
+        "add w21, w22, w20",
+        "and w20, w23, w24",
+        "orr w22, w23, w24",
+        "and w23, w18, w22",
+        "orr w22, w23, w20",
+        "add w20, w21, w22",
+        "ror w22, w18, #2",
+        "eor w23, w22, w18, ror #13",
+        "eor w22, w23, w18, ror #22",
+        "add w23, w20, w22",
+        "mov w20, v12.s[3]",
+        "add w22, w21, w20",
+        "mov v9.16b, v12.16b",
+        "mov v9.s[3], w23",
+        "mov v12.16b, v9.16b",
+        "mov v12.s[2], w18",
+        "mov v9.16b, v12.16b",
+        "mov v9.s[1], w22",
+        "mov v12.16b, v9.16b",
+        "mov v12.s[0], w30",
+        "mov v20.16b, v12.16b",
+        "tbl v9.16b, {v4.16b}, v5.16b",
+        "mov v16.16b, v9.16b",
+        "mov w20, v12.s[1]",
+        "mov w21, v12.s[0]",
+        "mov w22, v7.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[3]",
-        "mov w24, v20.s[2]",
-        "mov w25, v17.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v17.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v17.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v17.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v17.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v17.16b, v5.16b",
-        "mov v17.s[0], w22",
-        "mov v16.16b, v23.16b",
-        "ext v16.16b, v24.16b, v23.16b, #4",
-        "add v22.4s, v22.4s, v16.4s",
-        "mov w20, #0x1000",
-        "movk w20, #0x1, lsl #16",
-        "ldr q16, [x29, x20, sxtx]",
-        "mov w20, v23.s[2]",
-        "mov w21, v23.s[3]",
-        "mov w22, v22.s[0]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w20, w20, #10",
-        "eor w20, w23, w20",
-        "add w20, w22, w20",
-        "mov w22, v22.s[1]",
-        "ror w23, w21, #17",
-        "ror w24, w21, #19",
-        "eor w23, w23, w24",
-        "lsr w21, w21, #10",
-        "eor w21, w23, w21",
-        "add w21, w22, w21",
-        "mov w22, v22.s[2]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w24, w20, #10",
-        "eor w23, w23, w24",
-        "add w22, w22, w23",
-        "mov w23, v22.s[3]",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v9.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v7.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v12.s[3]",
+        "mov w23, v12.s[2]",
+        "mov w25, v7.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v7.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v9.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v7.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v7.s[3]",
+        "add w21, w24, w20",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[3], w22",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[2], w18",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[1], w21",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[0], w30",
+        "mov v17.16b, v7.16b",
+        "mov v16.16b, v13.16b",
+        "ext v4.16b, v14.16b, v13.16b, #4",
+        "mov v16.16b, v4.16b",
+        "add v9.4s, v3.4s, v4.4s",
+        "mov v22.16b, v9.16b",
+        "ldr x20, [sp, #32]",
+        "ldr x21, [sp, #96]",
+        "ldr q3, [x20, x21, sxtx]",
+        "mov v16.16b, v3.16b",
+        "mov w21, v13.s[2]",
+        "mov w22, v13.s[3]",
+        "mov w23, v9.s[0]",
         "ror w24, w21, #17",
         "ror w25, w21, #19",
-        "eor w24, w24, w25",
-        "lsr w25, w21, #10",
-        "eor w24, w24, w25",
-        "add w23, w23, w24",
-        "mov v5.16b, v22.16b",
-        "mov v5.s[3], w23",
-        "mov v5.s[2], w22",
-        "mov v5.s[1], w21",
-        "mov v22.16b, v5.16b",
-        "mov v22.s[0], w20",
-        "add v16.4s, v16.4s, v21.4s",
+        "eor w30, w24, w25",
+        "lsr w24, w21, #10",
+        "eor w21, w30, w24",
+        "add w24, w23, w21",
+        "mov w21, v9.s[1]",
+        "ror w23, w22, #17",
+        "ror w25, w22, #19",
+        "eor w30, w23, w25",
+        "lsr w23, w22, #10",
+        "eor w22, w30, w23",
+        "add w23, w21, w22",
+        "mov w21, v9.s[2]",
+        "ror w22, w24, #17",
+        "ror w25, w24, #19",
+        "eor w30, w22, w25",
+        "lsr w22, w24, #10",
+        "eor w25, w30, w22",
+        "add w22, w21, w25",
+        "mov w21, v9.s[3]",
+        "ror w25, w23, #17",
+        "ror w30, w23, #19",
+        "eor w18, w25, w30",
+        "lsr w25, w23, #10",
+        "eor w30, w18, w25",
+        "add w25, w21, w30",
+        "mov v4.16b, v9.16b",
+        "mov v4.s[3], w25",
+        "mov v9.16b, v4.16b",
+        "mov v9.s[2], w22",
+        "mov v4.16b, v9.16b",
+        "mov v4.s[1], w23",
+        "mov v9.16b, v4.16b",
+        "mov v9.s[0], w24",
+        "mov v22.16b, v9.16b",
+        "add v4.4s, v3.4s, v8.4s",
+        "mov v16.16b, v4.16b",
+        "mov v0.16b, v8.16b",
         "unimplemented (Unimplemented)",
-        "mov w20, v17.s[1]",
-        "mov w21, v17.s[0]",
-        "mov w22, v20.s[1]",
+        "mov v3.16b, v0.16b",
+        "mov v21.16b, v3.16b",
+        "mov w21, v7.s[1]",
+        "mov w22, v7.s[0]",
+        "mov w23, v12.s[1]",
+        "and w24, w21, w22",
+        "bic w25, w23, w21",
+        "eor w23, w24, w25",
+        "ror w24, w21, #6",
+        "eor w25, w24, w21, ror #11",
+        "eor w24, w25, w21, ror #25",
+        "add w25, w23, w24",
+        "mov w23, v4.s[0]",
+        "add w24, w25, w23",
+        "mov w23, v12.s[0]",
+        "add w25, w24, w23",
+        "mov w23, v7.s[3]",
+        "mov w24, v7.s[2]",
+        "mov w30, v12.s[3]",
+        "and w18, w24, w30",
+        "orr w20, w24, w30",
+        "and w30, w23, w20",
+        "orr w20, w30, w18",
+        "add w30, w25, w20",
+        "ror w20, w23, #2",
+        "eor w18, w20, w23, ror #13",
+        "eor w20, w18, w23, ror #22",
+        "add w18, w30, w20",
+        "mov w20, v12.s[2]",
+        "add w30, w25, w20",
+        "and w20, w30, w21",
+        "bic w21, w22, w30",
+        "eor w22, w20, w21",
+        "ror w20, w30, #6",
+        "eor w21, w20, w30, ror #11",
+        "eor w20, w21, w30, ror #25",
+        "add w21, w22, w20",
+        "mov w20, v4.s[1]",
+        "add w22, w21, w20",
+        "mov w20, v12.s[1]",
+        "add w21, w22, w20",
+        "and w20, w23, w24",
+        "orr w22, w23, w24",
+        "and w23, w18, w22",
+        "orr w22, w23, w20",
+        "add w20, w21, w22",
+        "ror w22, w18, #2",
+        "eor w23, w22, w18, ror #13",
+        "eor w22, w23, w18, ror #22",
+        "add w23, w20, w22",
+        "mov w20, v12.s[3]",
+        "add w22, w21, w20",
+        "mov v8.16b, v12.16b",
+        "mov v8.s[3], w23",
+        "mov v12.16b, v8.16b",
+        "mov v12.s[2], w18",
+        "mov v8.16b, v12.16b",
+        "mov v8.s[1], w22",
+        "mov v12.16b, v8.16b",
+        "mov v12.s[0], w30",
+        "mov v20.16b, v12.16b",
+        "tbl v8.16b, {v4.16b}, v5.16b",
+        "mov v16.16b, v8.16b",
+        "mov w20, v12.s[1]",
+        "mov w21, v12.s[0]",
+        "mov w22, v7.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[3]",
-        "mov w24, v17.s[2]",
-        "mov w25, v20.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v20.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v20.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v20.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v20.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v20.16b, v5.16b",
-        "mov v20.s[0], w22",
-        "tbl v16.16b, {v16.16b}, v4.16b",
-        "mov w20, v20.s[1]",
-        "mov w21, v20.s[0]",
-        "mov w22, v17.s[1]",
-        "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
-        "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[3]",
-        "mov w24, v20.s[2]",
-        "mov w25, v17.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v17.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v17.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v17.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v17.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v17.16b, v5.16b",
-        "mov v17.s[0], w22",
-        "mov v16.16b, v22.16b",
-        "ext v16.16b, v23.16b, v22.16b, #4",
-        "add v21.4s, v21.4s, v16.4s",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v8.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v7.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v12.s[3]",
+        "mov w23, v12.s[2]",
+        "mov w25, v7.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v7.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v8.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v7.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v7.s[3]",
+        "add w21, w24, w20",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[3], w22",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[2], w18",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[1], w21",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[0], w30",
+        "mov v17.16b, v7.16b",
+        "mov v16.16b, v9.16b",
+        "ext v4.16b, v13.16b, v9.16b, #4",
+        "mov v16.16b, v4.16b",
+        "add v8.4s, v3.4s, v4.4s",
+        "mov v21.16b, v8.16b",
         "mov w20, #0x1000",
         "movk w20, #0x1, lsl #16",
-        "ldr q16, [x29, x20, sxtx]",
-        "mov w20, v22.s[2]",
-        "mov w21, v22.s[3]",
-        "mov w22, v21.s[0]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w20, w20, #10",
-        "eor w20, w23, w20",
-        "add w20, w22, w20",
-        "mov w22, v21.s[1]",
-        "ror w23, w21, #17",
-        "ror w24, w21, #19",
-        "eor w23, w23, w24",
-        "lsr w21, w21, #10",
-        "eor w21, w23, w21",
-        "add w21, w22, w21",
-        "mov w22, v21.s[2]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w24, w20, #10",
-        "eor w23, w23, w24",
-        "add w22, w22, w23",
-        "mov w23, v21.s[3]",
-        "ror w24, w21, #17",
-        "ror w25, w21, #19",
-        "eor w24, w24, w25",
-        "lsr w25, w21, #10",
-        "eor w24, w24, w25",
-        "add w23, w23, w24",
-        "mov v5.16b, v21.16b",
-        "mov v5.s[3], w23",
-        "mov v5.s[2], w22",
-        "mov v5.s[1], w21",
-        "mov v21.16b, v5.16b",
-        "mov v21.s[0], w20",
-        "add v16.4s, v16.4s, v24.4s",
+        "ldr x21, [sp, #32]",
+        "ldr q3, [x21, x20, sxtx]",
+        "mov v16.16b, v3.16b",
+        "mov w22, v9.s[2]",
+        "mov w23, v9.s[3]",
+        "mov w24, v8.s[0]",
+        "ror w25, w22, #17",
+        "ror w30, w22, #19",
+        "eor w18, w25, w30",
+        "lsr w25, w22, #10",
+        "eor w22, w18, w25",
+        "add w25, w24, w22",
+        "mov w22, v8.s[1]",
+        "ror w24, w23, #17",
+        "ror w30, w23, #19",
+        "eor w18, w24, w30",
+        "lsr w24, w23, #10",
+        "eor w23, w18, w24",
+        "add w24, w22, w23",
+        "mov w22, v8.s[2]",
+        "ror w23, w25, #17",
+        "ror w30, w25, #19",
+        "eor w18, w23, w30",
+        "lsr w23, w25, #10",
+        "eor w30, w18, w23",
+        "add w23, w22, w30",
+        "mov w22, v8.s[3]",
+        "ror w30, w24, #17",
+        "ror w18, w24, #19",
+        "str x20, [sp, #128]",
+        "eor w20, w30, w18",
+        "lsr w30, w24, #10",
+        "eor w18, w20, w30",
+        "add w20, w22, w18",
+        "mov v4.16b, v8.16b",
+        "mov v4.s[3], w20",
+        "mov v8.16b, v4.16b",
+        "mov v8.s[2], w23",
+        "mov v4.16b, v8.16b",
+        "mov v4.s[1], w24",
+        "mov v8.16b, v4.16b",
+        "mov v8.s[0], w25",
+        "mov v21.16b, v8.16b",
+        "add v4.4s, v3.4s, v14.4s",
+        "mov v16.16b, v4.16b",
+        "mov v0.16b, v14.16b",
         "unimplemented (Unimplemented)",
-        "mov w20, v17.s[1]",
-        "mov w21, v17.s[0]",
-        "mov w22, v20.s[1]",
+        "mov v3.16b, v0.16b",
+        "mov v24.16b, v3.16b",
+        "mov w20, v7.s[1]",
+        "mov w22, v7.s[0]",
+        "mov w23, v12.s[1]",
+        "and w24, w20, w22",
+        "bic w25, w23, w20",
+        "eor w23, w24, w25",
+        "ror w24, w20, #6",
+        "eor w25, w24, w20, ror #11",
+        "eor w24, w25, w20, ror #25",
+        "add w25, w23, w24",
+        "mov w23, v4.s[0]",
+        "add w24, w25, w23",
+        "mov w23, v12.s[0]",
+        "add w25, w24, w23",
+        "mov w23, v7.s[3]",
+        "mov w24, v7.s[2]",
+        "mov w30, v12.s[3]",
+        "and w18, w24, w30",
+        "orr w21, w24, w30",
+        "and w30, w23, w21",
+        "orr w21, w30, w18",
+        "add w30, w25, w21",
+        "ror w21, w23, #2",
+        "eor w18, w21, w23, ror #13",
+        "eor w21, w18, w23, ror #22",
+        "add w18, w30, w21",
+        "mov w21, v12.s[2]",
+        "add w30, w25, w21",
+        "and w21, w30, w20",
+        "bic w20, w22, w30",
+        "eor w22, w21, w20",
+        "ror w20, w30, #6",
+        "eor w21, w20, w30, ror #11",
+        "eor w20, w21, w30, ror #25",
+        "add w21, w22, w20",
+        "mov w20, v4.s[1]",
+        "add w22, w21, w20",
+        "mov w20, v12.s[1]",
+        "add w21, w22, w20",
+        "and w20, w23, w24",
+        "orr w22, w23, w24",
+        "and w23, w18, w22",
+        "orr w22, w23, w20",
+        "add w20, w21, w22",
+        "ror w22, w18, #2",
+        "eor w23, w22, w18, ror #13",
+        "eor w22, w23, w18, ror #22",
+        "add w23, w20, w22",
+        "mov w20, v12.s[3]",
+        "add w22, w21, w20",
+        "mov v14.16b, v12.16b",
+        "mov v14.s[3], w23",
+        "mov v12.16b, v14.16b",
+        "mov v12.s[2], w18",
+        "mov v14.16b, v12.16b",
+        "mov v14.s[1], w22",
+        "mov v12.16b, v14.16b",
+        "mov v12.s[0], w30",
+        "mov v20.16b, v12.16b",
+        "tbl v14.16b, {v4.16b}, v5.16b",
+        "mov v16.16b, v14.16b",
+        "mov w20, v12.s[1]",
+        "mov w21, v12.s[0]",
+        "mov w22, v7.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[3]",
-        "mov w24, v17.s[2]",
-        "mov w25, v20.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v20.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v20.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v20.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v20.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v20.16b, v5.16b",
-        "mov v20.s[0], w22",
-        "tbl v16.16b, {v16.16b}, v4.16b",
-        "mov w20, v20.s[1]",
-        "mov w21, v20.s[0]",
-        "mov w22, v17.s[1]",
-        "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
-        "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[3]",
-        "mov w24, v20.s[2]",
-        "mov w25, v17.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v17.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v17.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v17.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v17.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v17.16b, v5.16b",
-        "mov v17.s[0], w22",
-        "mov v16.16b, v21.16b",
-        "ext v16.16b, v22.16b, v21.16b, #4",
-        "add v24.4s, v24.4s, v16.4s",
-        "mov w20, #0x1000",
-        "movk w20, #0x1, lsl #16",
-        "ldr q16, [x29, x20, sxtx]",
-        "mov w20, v21.s[2]",
-        "mov w21, v21.s[3]",
-        "mov w22, v24.s[0]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w20, w20, #10",
-        "eor w20, w23, w20",
-        "add w20, w22, w20",
-        "mov w22, v24.s[1]",
-        "ror w23, w21, #17",
-        "ror w24, w21, #19",
-        "eor w23, w23, w24",
-        "lsr w21, w21, #10",
-        "eor w21, w23, w21",
-        "add w21, w22, w21",
-        "mov w22, v24.s[2]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w24, w20, #10",
-        "eor w23, w23, w24",
-        "add w22, w22, w23",
-        "mov w23, v24.s[3]",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v14.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v7.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v12.s[3]",
+        "mov w23, v12.s[2]",
+        "mov w25, v7.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v7.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v14.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v7.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v7.s[3]",
+        "add w21, w24, w20",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[3], w22",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[2], w18",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[1], w21",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[0], w30",
+        "mov v17.16b, v7.16b",
+        "mov v16.16b, v8.16b",
+        "ext v4.16b, v9.16b, v8.16b, #4",
+        "mov v16.16b, v4.16b",
+        "add v14.4s, v3.4s, v4.4s",
+        "mov v24.16b, v14.16b",
+        "ldr x20, [sp, #32]",
+        "ldr x21, [sp, #128]",
+        "ldr q3, [x20, x21, sxtx]",
+        "mov v16.16b, v3.16b",
+        "mov w21, v8.s[2]",
+        "mov w22, v8.s[3]",
+        "mov w23, v14.s[0]",
         "ror w24, w21, #17",
         "ror w25, w21, #19",
-        "eor w24, w24, w25",
-        "lsr w25, w21, #10",
-        "eor w24, w24, w25",
-        "add w23, w23, w24",
-        "mov v5.16b, v24.16b",
-        "mov v5.s[3], w23",
-        "mov v5.s[2], w22",
-        "mov v5.s[1], w21",
-        "mov v24.16b, v5.16b",
-        "mov v24.s[0], w20",
-        "add v16.4s, v16.4s, v23.4s",
+        "eor w30, w24, w25",
+        "lsr w24, w21, #10",
+        "eor w21, w30, w24",
+        "add w24, w23, w21",
+        "mov w21, v14.s[1]",
+        "ror w23, w22, #17",
+        "ror w25, w22, #19",
+        "eor w30, w23, w25",
+        "lsr w23, w22, #10",
+        "eor w22, w30, w23",
+        "add w23, w21, w22",
+        "mov w21, v14.s[2]",
+        "ror w22, w24, #17",
+        "ror w25, w24, #19",
+        "eor w30, w22, w25",
+        "lsr w22, w24, #10",
+        "eor w25, w30, w22",
+        "add w22, w21, w25",
+        "mov w21, v14.s[3]",
+        "ror w25, w23, #17",
+        "ror w30, w23, #19",
+        "eor w18, w25, w30",
+        "lsr w25, w23, #10",
+        "eor w30, w18, w25",
+        "add w25, w21, w30",
+        "mov v4.16b, v14.16b",
+        "mov v4.s[3], w25",
+        "mov v14.16b, v4.16b",
+        "mov v14.s[2], w22",
+        "mov v4.16b, v14.16b",
+        "mov v4.s[1], w23",
+        "mov v14.16b, v4.16b",
+        "mov v14.s[0], w24",
+        "mov v24.16b, v14.16b",
+        "add v4.4s, v3.4s, v13.4s",
+        "mov v16.16b, v4.16b",
+        "mov v0.16b, v13.16b",
         "unimplemented (Unimplemented)",
-        "mov w20, v17.s[1]",
-        "mov w21, v17.s[0]",
-        "mov w22, v20.s[1]",
-        "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
-        "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[3]",
-        "mov w24, v17.s[2]",
-        "mov w25, v20.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v20.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v20.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v20.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v20.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v20.16b, v5.16b",
-        "mov v20.s[0], w22",
-        "tbl v16.16b, {v16.16b}, v4.16b",
-        "mov w20, v20.s[1]",
-        "mov w21, v20.s[0]",
-        "mov w22, v17.s[1]",
+        "mov v3.16b, v0.16b",
+        "mov v23.16b, v3.16b",
+        "mov w21, v7.s[1]",
+        "mov w22, v7.s[0]",
+        "mov w23, v12.s[1]",
+        "and w24, w21, w22",
+        "bic w25, w23, w21",
+        "eor w23, w24, w25",
+        "ror w24, w21, #6",
+        "eor w25, w24, w21, ror #11",
+        "eor w24, w25, w21, ror #25",
+        "add w25, w23, w24",
+        "mov w23, v4.s[0]",
+        "add w24, w25, w23",
+        "mov w23, v12.s[0]",
+        "add w25, w24, w23",
+        "mov w23, v7.s[3]",
+        "mov w24, v7.s[2]",
+        "mov w30, v12.s[3]",
+        "and w18, w24, w30",
+        "orr w20, w24, w30",
+        "and w30, w23, w20",
+        "orr w20, w30, w18",
+        "add w30, w25, w20",
+        "ror w20, w23, #2",
+        "eor w18, w20, w23, ror #13",
+        "eor w20, w18, w23, ror #22",
+        "add w18, w30, w20",
+        "mov w20, v12.s[2]",
+        "add w30, w25, w20",
+        "and w20, w30, w21",
+        "bic w21, w22, w30",
+        "eor w22, w20, w21",
+        "ror w20, w30, #6",
+        "eor w21, w20, w30, ror #11",
+        "eor w20, w21, w30, ror #25",
+        "add w21, w22, w20",
+        "mov w20, v4.s[1]",
+        "add w22, w21, w20",
+        "mov w20, v12.s[1]",
+        "add w21, w22, w20",
+        "and w20, w23, w24",
+        "orr w22, w23, w24",
+        "and w23, w18, w22",
+        "orr w22, w23, w20",
+        "add w20, w21, w22",
+        "ror w22, w18, #2",
+        "eor w23, w22, w18, ror #13",
+        "eor w22, w23, w18, ror #22",
+        "add w23, w20, w22",
+        "mov w20, v12.s[3]",
+        "add w22, w21, w20",
+        "mov v13.16b, v12.16b",
+        "mov v13.s[3], w23",
+        "mov v12.16b, v13.16b",
+        "mov v12.s[2], w18",
+        "mov v13.16b, v12.16b",
+        "mov v13.s[1], w22",
+        "mov v12.16b, v13.16b",
+        "mov v12.s[0], w30",
+        "mov v20.16b, v12.16b",
+        "tbl v13.16b, {v4.16b}, v5.16b",
+        "mov v16.16b, v13.16b",
+        "mov w20, v12.s[1]",
+        "mov w21, v12.s[0]",
+        "mov w22, v7.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[3]",
-        "mov w24, v20.s[2]",
-        "mov w25, v17.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v17.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v17.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v17.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v17.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v17.16b, v5.16b",
-        "mov v17.s[0], w22",
-        "mov v16.16b, v24.16b",
-        "ext v16.16b, v21.16b, v24.16b, #4",
-        "add v23.4s, v23.4s, v16.4s",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v13.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v7.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v12.s[3]",
+        "mov w23, v12.s[2]",
+        "mov w25, v7.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v7.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v13.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v7.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v7.s[3]",
+        "add w21, w24, w20",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[3], w22",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[2], w18",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[1], w21",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[0], w30",
+        "mov v17.16b, v7.16b",
+        "mov v16.16b, v14.16b",
+        "ext v4.16b, v8.16b, v14.16b, #4",
+        "mov v16.16b, v4.16b",
+        "add v13.4s, v3.4s, v4.4s",
+        "mov v23.16b, v13.16b",
         "mov w20, #0x1000",
         "movk w20, #0x1, lsl #16",
-        "ldr q16, [x29, x20, sxtx]",
-        "mov w20, v24.s[2]",
-        "mov w21, v24.s[3]",
-        "mov w22, v23.s[0]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w20, w20, #10",
-        "eor w20, w23, w20",
-        "add w20, w22, w20",
-        "mov w22, v23.s[1]",
-        "ror w23, w21, #17",
-        "ror w24, w21, #19",
-        "eor w23, w23, w24",
-        "lsr w21, w21, #10",
-        "eor w21, w23, w21",
-        "add w21, w22, w21",
-        "mov w22, v23.s[2]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w24, w20, #10",
-        "eor w23, w23, w24",
-        "add w22, w22, w23",
-        "mov w23, v23.s[3]",
-        "ror w24, w21, #17",
-        "ror w25, w21, #19",
-        "eor w24, w24, w25",
-        "lsr w25, w21, #10",
-        "eor w24, w24, w25",
-        "add w23, w23, w24",
-        "mov v5.16b, v23.16b",
-        "mov v5.s[3], w23",
-        "mov v5.s[2], w22",
-        "mov v5.s[1], w21",
-        "mov v23.16b, v5.16b",
-        "mov v23.s[0], w20",
-        "add v16.4s, v16.4s, v22.4s",
+        "ldr x21, [sp, #32]",
+        "ldr q3, [x21, x20, sxtx]",
+        "mov v16.16b, v3.16b",
+        "mov w22, v14.s[2]",
+        "mov w23, v14.s[3]",
+        "mov w24, v13.s[0]",
+        "ror w25, w22, #17",
+        "ror w30, w22, #19",
+        "eor w18, w25, w30",
+        "lsr w25, w22, #10",
+        "eor w22, w18, w25",
+        "add w25, w24, w22",
+        "mov w22, v13.s[1]",
+        "ror w24, w23, #17",
+        "ror w30, w23, #19",
+        "eor w18, w24, w30",
+        "lsr w24, w23, #10",
+        "eor w23, w18, w24",
+        "add w24, w22, w23",
+        "mov w22, v13.s[2]",
+        "ror w23, w25, #17",
+        "ror w30, w25, #19",
+        "eor w18, w23, w30",
+        "lsr w23, w25, #10",
+        "eor w30, w18, w23",
+        "add w23, w22, w30",
+        "mov w22, v13.s[3]",
+        "ror w30, w24, #17",
+        "ror w18, w24, #19",
+        "str x20, [sp, #160]",
+        "eor w20, w30, w18",
+        "lsr w30, w24, #10",
+        "eor w18, w20, w30",
+        "add w20, w22, w18",
+        "mov v4.16b, v13.16b",
+        "mov v4.s[3], w20",
+        "mov v13.16b, v4.16b",
+        "mov v13.s[2], w23",
+        "mov v4.16b, v13.16b",
+        "mov v4.s[1], w24",
+        "mov v13.16b, v4.16b",
+        "mov v13.s[0], w25",
+        "mov v23.16b, v13.16b",
+        "add v4.4s, v3.4s, v9.4s",
+        "mov v16.16b, v4.16b",
+        "mov v0.16b, v9.16b",
         "unimplemented (Unimplemented)",
-        "mov w20, v17.s[1]",
-        "mov w21, v17.s[0]",
-        "mov w22, v20.s[1]",
+        "mov v3.16b, v0.16b",
+        "mov v22.16b, v3.16b",
+        "mov w20, v7.s[1]",
+        "mov w22, v7.s[0]",
+        "mov w23, v12.s[1]",
+        "and w24, w20, w22",
+        "bic w25, w23, w20",
+        "eor w23, w24, w25",
+        "ror w24, w20, #6",
+        "eor w25, w24, w20, ror #11",
+        "eor w24, w25, w20, ror #25",
+        "add w25, w23, w24",
+        "mov w23, v4.s[0]",
+        "add w24, w25, w23",
+        "mov w23, v12.s[0]",
+        "add w25, w24, w23",
+        "mov w23, v7.s[3]",
+        "mov w24, v7.s[2]",
+        "mov w30, v12.s[3]",
+        "and w18, w24, w30",
+        "orr w21, w24, w30",
+        "and w30, w23, w21",
+        "orr w21, w30, w18",
+        "add w30, w25, w21",
+        "ror w21, w23, #2",
+        "eor w18, w21, w23, ror #13",
+        "eor w21, w18, w23, ror #22",
+        "add w18, w30, w21",
+        "mov w21, v12.s[2]",
+        "add w30, w25, w21",
+        "and w21, w30, w20",
+        "bic w20, w22, w30",
+        "eor w22, w21, w20",
+        "ror w20, w30, #6",
+        "eor w21, w20, w30, ror #11",
+        "eor w20, w21, w30, ror #25",
+        "add w21, w22, w20",
+        "mov w20, v4.s[1]",
+        "add w22, w21, w20",
+        "mov w20, v12.s[1]",
+        "add w21, w22, w20",
+        "and w20, w23, w24",
+        "orr w22, w23, w24",
+        "and w23, w18, w22",
+        "orr w22, w23, w20",
+        "add w20, w21, w22",
+        "ror w22, w18, #2",
+        "eor w23, w22, w18, ror #13",
+        "eor w22, w23, w18, ror #22",
+        "add w23, w20, w22",
+        "mov w20, v12.s[3]",
+        "add w22, w21, w20",
+        "mov v9.16b, v12.16b",
+        "mov v9.s[3], w23",
+        "mov v12.16b, v9.16b",
+        "mov v12.s[2], w18",
+        "mov v9.16b, v12.16b",
+        "mov v9.s[1], w22",
+        "mov v12.16b, v9.16b",
+        "mov v12.s[0], w30",
+        "mov v20.16b, v12.16b",
+        "tbl v9.16b, {v4.16b}, v5.16b",
+        "mov v16.16b, v9.16b",
+        "mov w20, v12.s[1]",
+        "mov w21, v12.s[0]",
+        "mov w22, v7.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[3]",
-        "mov w24, v17.s[2]",
-        "mov w25, v20.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v20.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v20.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v20.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v20.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v20.16b, v5.16b",
-        "mov v20.s[0], w22",
-        "tbl v16.16b, {v16.16b}, v4.16b",
-        "mov w20, v20.s[1]",
-        "mov w21, v20.s[0]",
-        "mov w22, v17.s[1]",
-        "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
-        "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[3]",
-        "mov w24, v20.s[2]",
-        "mov w25, v17.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v17.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v17.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v17.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v17.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v17.16b, v5.16b",
-        "mov v17.s[0], w22",
-        "mov v16.16b, v23.16b",
-        "ext v16.16b, v24.16b, v23.16b, #4",
-        "add v22.4s, v22.4s, v16.4s",
-        "mov w20, #0x1000",
-        "movk w20, #0x1, lsl #16",
-        "ldr q16, [x29, x20, sxtx]",
-        "mov w20, v23.s[2]",
-        "mov w21, v23.s[3]",
-        "mov w22, v22.s[0]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w20, w20, #10",
-        "eor w20, w23, w20",
-        "add w20, w22, w20",
-        "mov w22, v22.s[1]",
-        "ror w23, w21, #17",
-        "ror w24, w21, #19",
-        "eor w23, w23, w24",
-        "lsr w21, w21, #10",
-        "eor w21, w23, w21",
-        "add w21, w22, w21",
-        "mov w22, v22.s[2]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w24, w20, #10",
-        "eor w23, w23, w24",
-        "add w22, w22, w23",
-        "mov w23, v22.s[3]",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v9.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v7.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v12.s[3]",
+        "mov w23, v12.s[2]",
+        "mov w25, v7.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v7.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v9.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v7.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v7.s[3]",
+        "add w21, w24, w20",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[3], w22",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[2], w18",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[1], w21",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[0], w30",
+        "mov v17.16b, v7.16b",
+        "mov v16.16b, v13.16b",
+        "ext v4.16b, v14.16b, v13.16b, #4",
+        "mov v16.16b, v4.16b",
+        "add v9.4s, v3.4s, v4.4s",
+        "mov v22.16b, v9.16b",
+        "ldr x20, [sp, #32]",
+        "ldr x21, [sp, #160]",
+        "ldr q3, [x20, x21, sxtx]",
+        "mov v16.16b, v3.16b",
+        "mov w21, v13.s[2]",
+        "mov w22, v13.s[3]",
+        "mov w23, v9.s[0]",
         "ror w24, w21, #17",
         "ror w25, w21, #19",
-        "eor w24, w24, w25",
-        "lsr w25, w21, #10",
-        "eor w24, w24, w25",
-        "add w23, w23, w24",
-        "mov v5.16b, v22.16b",
-        "mov v5.s[3], w23",
-        "mov v5.s[2], w22",
-        "mov v5.s[1], w21",
-        "mov v22.16b, v5.16b",
-        "mov v22.s[0], w20",
-        "add v16.4s, v16.4s, v21.4s",
+        "eor w30, w24, w25",
+        "lsr w24, w21, #10",
+        "eor w21, w30, w24",
+        "add w24, w23, w21",
+        "mov w21, v9.s[1]",
+        "ror w23, w22, #17",
+        "ror w25, w22, #19",
+        "eor w30, w23, w25",
+        "lsr w23, w22, #10",
+        "eor w22, w30, w23",
+        "add w23, w21, w22",
+        "mov w21, v9.s[2]",
+        "ror w22, w24, #17",
+        "ror w25, w24, #19",
+        "eor w30, w22, w25",
+        "lsr w22, w24, #10",
+        "eor w25, w30, w22",
+        "add w22, w21, w25",
+        "mov w21, v9.s[3]",
+        "ror w25, w23, #17",
+        "ror w30, w23, #19",
+        "eor w18, w25, w30",
+        "lsr w25, w23, #10",
+        "eor w30, w18, w25",
+        "add w25, w21, w30",
+        "mov v4.16b, v9.16b",
+        "mov v4.s[3], w25",
+        "mov v9.16b, v4.16b",
+        "mov v9.s[2], w22",
+        "mov v4.16b, v9.16b",
+        "mov v4.s[1], w23",
+        "mov v9.16b, v4.16b",
+        "mov v9.s[0], w24",
+        "mov v22.16b, v9.16b",
+        "add v4.4s, v3.4s, v8.4s",
+        "mov v16.16b, v4.16b",
+        "mov v0.16b, v8.16b",
         "unimplemented (Unimplemented)",
-        "mov w20, v17.s[1]",
-        "mov w21, v17.s[0]",
-        "mov w22, v20.s[1]",
-        "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
-        "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[3]",
-        "mov w24, v17.s[2]",
-        "mov w25, v20.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v20.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v20.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v20.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v20.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v20.16b, v5.16b",
-        "mov v20.s[0], w22",
-        "tbl v16.16b, {v16.16b}, v4.16b",
-        "mov w20, v20.s[1]",
-        "mov w21, v20.s[0]",
-        "mov w22, v17.s[1]",
+        "mov v3.16b, v0.16b",
+        "mov v21.16b, v3.16b",
+        "mov w21, v7.s[1]",
+        "mov w22, v7.s[0]",
+        "mov w23, v12.s[1]",
+        "and w24, w21, w22",
+        "bic w25, w23, w21",
+        "eor w23, w24, w25",
+        "ror w24, w21, #6",
+        "eor w25, w24, w21, ror #11",
+        "eor w24, w25, w21, ror #25",
+        "add w25, w23, w24",
+        "mov w23, v4.s[0]",
+        "add w24, w25, w23",
+        "mov w23, v12.s[0]",
+        "add w25, w24, w23",
+        "mov w23, v7.s[3]",
+        "mov w24, v7.s[2]",
+        "mov w30, v12.s[3]",
+        "and w18, w24, w30",
+        "orr w20, w24, w30",
+        "and w30, w23, w20",
+        "orr w20, w30, w18",
+        "add w30, w25, w20",
+        "ror w20, w23, #2",
+        "eor w18, w20, w23, ror #13",
+        "eor w20, w18, w23, ror #22",
+        "add w18, w30, w20",
+        "mov w20, v12.s[2]",
+        "add w30, w25, w20",
+        "and w20, w30, w21",
+        "bic w21, w22, w30",
+        "eor w22, w20, w21",
+        "ror w20, w30, #6",
+        "eor w21, w20, w30, ror #11",
+        "eor w20, w21, w30, ror #25",
+        "add w21, w22, w20",
+        "mov w20, v4.s[1]",
+        "add w22, w21, w20",
+        "mov w20, v12.s[1]",
+        "add w21, w22, w20",
+        "and w20, w23, w24",
+        "orr w22, w23, w24",
+        "and w23, w18, w22",
+        "orr w22, w23, w20",
+        "add w20, w21, w22",
+        "ror w22, w18, #2",
+        "eor w23, w22, w18, ror #13",
+        "eor w22, w23, w18, ror #22",
+        "add w23, w20, w22",
+        "mov w20, v12.s[3]",
+        "add w22, w21, w20",
+        "mov v8.16b, v12.16b",
+        "mov v8.s[3], w23",
+        "mov v12.16b, v8.16b",
+        "mov v12.s[2], w18",
+        "mov v8.16b, v12.16b",
+        "mov v8.s[1], w22",
+        "mov v12.16b, v8.16b",
+        "mov v12.s[0], w30",
+        "mov v20.16b, v12.16b",
+        "tbl v8.16b, {v4.16b}, v5.16b",
+        "mov v16.16b, v8.16b",
+        "mov w20, v12.s[1]",
+        "mov w21, v12.s[0]",
+        "mov w22, v7.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[3]",
-        "mov w24, v20.s[2]",
-        "mov w25, v17.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v17.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v17.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v17.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v17.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v17.16b, v5.16b",
-        "mov v17.s[0], w22",
-        "mov v16.16b, v22.16b",
-        "ext v16.16b, v23.16b, v22.16b, #4",
-        "add v21.4s, v21.4s, v16.4s",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v8.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v7.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v12.s[3]",
+        "mov w23, v12.s[2]",
+        "mov w25, v7.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v7.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v8.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v7.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v7.s[3]",
+        "add w21, w24, w20",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[3], w22",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[2], w18",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[1], w21",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[0], w30",
+        "mov v17.16b, v7.16b",
+        "mov v16.16b, v9.16b",
+        "ext v4.16b, v13.16b, v9.16b, #4",
+        "mov v16.16b, v4.16b",
+        "add v8.4s, v3.4s, v4.4s",
+        "mov v21.16b, v8.16b",
         "mov w20, #0x1000",
         "movk w20, #0x1, lsl #16",
-        "ldr q16, [x29, x20, sxtx]",
-        "mov w20, v22.s[2]",
-        "mov w21, v22.s[3]",
-        "mov w22, v21.s[0]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w20, w20, #10",
-        "eor w20, w23, w20",
-        "add w20, w22, w20",
-        "mov w22, v21.s[1]",
-        "ror w23, w21, #17",
-        "ror w24, w21, #19",
-        "eor w23, w23, w24",
-        "lsr w21, w21, #10",
-        "eor w21, w23, w21",
-        "add w21, w22, w21",
-        "mov w22, v21.s[2]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w24, w20, #10",
-        "eor w23, w23, w24",
-        "add w22, w22, w23",
-        "mov w23, v21.s[3]",
-        "ror w24, w21, #17",
-        "ror w25, w21, #19",
-        "eor w24, w24, w25",
-        "lsr w25, w21, #10",
-        "eor w24, w24, w25",
-        "add w23, w23, w24",
-        "mov v5.16b, v21.16b",
-        "mov v5.s[3], w23",
-        "mov v5.s[2], w22",
-        "mov v5.s[1], w21",
-        "mov v21.16b, v5.16b",
-        "mov v21.s[0], w20",
-        "add v16.4s, v16.4s, v24.4s",
+        "ldr x21, [sp, #32]",
+        "ldr q3, [x21, x20, sxtx]",
+        "mov v16.16b, v3.16b",
+        "mov w22, v9.s[2]",
+        "mov w23, v9.s[3]",
+        "mov w24, v8.s[0]",
+        "ror w25, w22, #17",
+        "ror w30, w22, #19",
+        "eor w18, w25, w30",
+        "lsr w25, w22, #10",
+        "eor w22, w18, w25",
+        "add w25, w24, w22",
+        "mov w22, v8.s[1]",
+        "ror w24, w23, #17",
+        "ror w30, w23, #19",
+        "eor w18, w24, w30",
+        "lsr w24, w23, #10",
+        "eor w23, w18, w24",
+        "add w24, w22, w23",
+        "mov w22, v8.s[2]",
+        "ror w23, w25, #17",
+        "ror w30, w25, #19",
+        "eor w18, w23, w30",
+        "lsr w23, w25, #10",
+        "eor w30, w18, w23",
+        "add w23, w22, w30",
+        "mov w22, v8.s[3]",
+        "ror w30, w24, #17",
+        "ror w18, w24, #19",
+        "str x20, [sp, #192]",
+        "eor w20, w30, w18",
+        "lsr w30, w24, #10",
+        "eor w18, w20, w30",
+        "add w20, w22, w18",
+        "mov v4.16b, v8.16b",
+        "mov v4.s[3], w20",
+        "mov v8.16b, v4.16b",
+        "mov v8.s[2], w23",
+        "mov v4.16b, v8.16b",
+        "mov v4.s[1], w24",
+        "mov v8.16b, v4.16b",
+        "mov v8.s[0], w25",
+        "mov v21.16b, v8.16b",
+        "add v4.4s, v3.4s, v14.4s",
+        "mov v16.16b, v4.16b",
+        "mov v0.16b, v14.16b",
         "unimplemented (Unimplemented)",
-        "mov w20, v17.s[1]",
-        "mov w21, v17.s[0]",
-        "mov w22, v20.s[1]",
+        "mov v3.16b, v0.16b",
+        "mov v24.16b, v3.16b",
+        "mov w20, v7.s[1]",
+        "mov w22, v7.s[0]",
+        "mov w23, v12.s[1]",
+        "and w24, w20, w22",
+        "bic w25, w23, w20",
+        "eor w23, w24, w25",
+        "ror w24, w20, #6",
+        "eor w25, w24, w20, ror #11",
+        "eor w24, w25, w20, ror #25",
+        "add w25, w23, w24",
+        "mov w23, v4.s[0]",
+        "add w24, w25, w23",
+        "mov w23, v12.s[0]",
+        "add w25, w24, w23",
+        "mov w23, v7.s[3]",
+        "mov w24, v7.s[2]",
+        "mov w30, v12.s[3]",
+        "and w18, w24, w30",
+        "orr w21, w24, w30",
+        "and w30, w23, w21",
+        "orr w21, w30, w18",
+        "add w30, w25, w21",
+        "ror w21, w23, #2",
+        "eor w18, w21, w23, ror #13",
+        "eor w21, w18, w23, ror #22",
+        "add w18, w30, w21",
+        "mov w21, v12.s[2]",
+        "add w30, w25, w21",
+        "and w21, w30, w20",
+        "bic w20, w22, w30",
+        "eor w22, w21, w20",
+        "ror w20, w30, #6",
+        "eor w21, w20, w30, ror #11",
+        "eor w20, w21, w30, ror #25",
+        "add w21, w22, w20",
+        "mov w20, v4.s[1]",
+        "add w22, w21, w20",
+        "mov w20, v12.s[1]",
+        "add w21, w22, w20",
+        "and w20, w23, w24",
+        "orr w22, w23, w24",
+        "and w23, w18, w22",
+        "orr w22, w23, w20",
+        "add w20, w21, w22",
+        "ror w22, w18, #2",
+        "eor w23, w22, w18, ror #13",
+        "eor w22, w23, w18, ror #22",
+        "add w23, w20, w22",
+        "mov w20, v12.s[3]",
+        "add w22, w21, w20",
+        "mov v14.16b, v12.16b",
+        "mov v14.s[3], w23",
+        "mov v12.16b, v14.16b",
+        "mov v12.s[2], w18",
+        "mov v14.16b, v12.16b",
+        "mov v14.s[1], w22",
+        "mov v12.16b, v14.16b",
+        "mov v12.s[0], w30",
+        "mov v20.16b, v12.16b",
+        "tbl v14.16b, {v4.16b}, v5.16b",
+        "mov v16.16b, v14.16b",
+        "mov w20, v12.s[1]",
+        "mov w21, v12.s[0]",
+        "mov w22, v7.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[3]",
-        "mov w24, v17.s[2]",
-        "mov w25, v20.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v20.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v20.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v20.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v20.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v20.16b, v5.16b",
-        "mov v20.s[0], w22",
-        "tbl v16.16b, {v16.16b}, v4.16b",
-        "mov w20, v20.s[1]",
-        "mov w21, v20.s[0]",
-        "mov w22, v17.s[1]",
-        "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
-        "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[3]",
-        "mov w24, v20.s[2]",
-        "mov w25, v17.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v17.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v17.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v17.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v17.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v17.16b, v5.16b",
-        "mov v17.s[0], w22",
-        "mov v16.16b, v21.16b",
-        "ext v16.16b, v22.16b, v21.16b, #4",
-        "add v24.4s, v24.4s, v16.4s",
-        "mov w20, #0x1000",
-        "movk w20, #0x1, lsl #16",
-        "ldr q16, [x29, x20, sxtx]",
-        "mov w20, v21.s[2]",
-        "mov w21, v21.s[3]",
-        "mov w22, v24.s[0]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w20, w20, #10",
-        "eor w20, w23, w20",
-        "add w20, w22, w20",
-        "mov w22, v24.s[1]",
-        "ror w23, w21, #17",
-        "ror w24, w21, #19",
-        "eor w23, w23, w24",
-        "lsr w21, w21, #10",
-        "eor w21, w23, w21",
-        "add w21, w22, w21",
-        "mov w22, v24.s[2]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w24, w20, #10",
-        "eor w23, w23, w24",
-        "add w22, w22, w23",
-        "mov w23, v24.s[3]",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v14.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v7.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v12.s[3]",
+        "mov w23, v12.s[2]",
+        "mov w25, v7.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v7.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v14.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v7.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v7.s[3]",
+        "add w21, w24, w20",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[3], w22",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[2], w18",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[1], w21",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[0], w30",
+        "mov v17.16b, v7.16b",
+        "mov v16.16b, v8.16b",
+        "ext v4.16b, v9.16b, v8.16b, #4",
+        "mov v16.16b, v4.16b",
+        "add v14.4s, v3.4s, v4.4s",
+        "mov v24.16b, v14.16b",
+        "ldr x20, [sp, #32]",
+        "ldr x21, [sp, #192]",
+        "ldr q3, [x20, x21, sxtx]",
+        "mov v16.16b, v3.16b",
+        "mov w21, v8.s[2]",
+        "mov w22, v8.s[3]",
+        "mov w23, v14.s[0]",
         "ror w24, w21, #17",
         "ror w25, w21, #19",
-        "eor w24, w24, w25",
-        "lsr w25, w21, #10",
-        "eor w24, w24, w25",
-        "add w23, w23, w24",
-        "mov v5.16b, v24.16b",
-        "mov v5.s[3], w23",
-        "mov v5.s[2], w22",
-        "mov v5.s[1], w21",
-        "mov v24.16b, v5.16b",
-        "mov v24.s[0], w20",
-        "add v16.4s, v16.4s, v23.4s",
+        "eor w30, w24, w25",
+        "lsr w24, w21, #10",
+        "eor w21, w30, w24",
+        "add w24, w23, w21",
+        "mov w21, v14.s[1]",
+        "ror w23, w22, #17",
+        "ror w25, w22, #19",
+        "eor w30, w23, w25",
+        "lsr w23, w22, #10",
+        "eor w22, w30, w23",
+        "add w23, w21, w22",
+        "mov w21, v14.s[2]",
+        "ror w22, w24, #17",
+        "ror w25, w24, #19",
+        "eor w30, w22, w25",
+        "lsr w22, w24, #10",
+        "eor w25, w30, w22",
+        "add w22, w21, w25",
+        "mov w21, v14.s[3]",
+        "ror w25, w23, #17",
+        "ror w30, w23, #19",
+        "eor w18, w25, w30",
+        "lsr w25, w23, #10",
+        "eor w30, w18, w25",
+        "add w25, w21, w30",
+        "mov v4.16b, v14.16b",
+        "mov v4.s[3], w25",
+        "mov v14.16b, v4.16b",
+        "mov v14.s[2], w22",
+        "mov v4.16b, v14.16b",
+        "mov v4.s[1], w23",
+        "mov v14.16b, v4.16b",
+        "mov v14.s[0], w24",
+        "mov v24.16b, v14.16b",
+        "add v4.4s, v3.4s, v13.4s",
+        "mov v16.16b, v4.16b",
+        "mov v0.16b, v13.16b",
         "unimplemented (Unimplemented)",
-        "mov w20, v17.s[1]",
-        "mov w21, v17.s[0]",
-        "mov w22, v20.s[1]",
-        "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
-        "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[3]",
-        "mov w24, v17.s[2]",
-        "mov w25, v20.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v20.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v20.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v20.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v20.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v20.16b, v5.16b",
-        "mov v20.s[0], w22",
-        "tbl v16.16b, {v16.16b}, v4.16b",
-        "mov w20, v20.s[1]",
-        "mov w21, v20.s[0]",
-        "mov w22, v17.s[1]",
+        "mov v3.16b, v0.16b",
+        "mov v23.16b, v3.16b",
+        "mov w21, v7.s[1]",
+        "mov w22, v7.s[0]",
+        "mov w23, v12.s[1]",
+        "and w24, w21, w22",
+        "bic w25, w23, w21",
+        "eor w23, w24, w25",
+        "ror w24, w21, #6",
+        "eor w25, w24, w21, ror #11",
+        "eor w24, w25, w21, ror #25",
+        "add w25, w23, w24",
+        "mov w23, v4.s[0]",
+        "add w24, w25, w23",
+        "mov w23, v12.s[0]",
+        "add w25, w24, w23",
+        "mov w23, v7.s[3]",
+        "mov w24, v7.s[2]",
+        "mov w30, v12.s[3]",
+        "and w18, w24, w30",
+        "orr w20, w24, w30",
+        "and w30, w23, w20",
+        "orr w20, w30, w18",
+        "add w30, w25, w20",
+        "ror w20, w23, #2",
+        "eor w18, w20, w23, ror #13",
+        "eor w20, w18, w23, ror #22",
+        "add w18, w30, w20",
+        "mov w20, v12.s[2]",
+        "add w30, w25, w20",
+        "and w20, w30, w21",
+        "bic w21, w22, w30",
+        "eor w22, w20, w21",
+        "ror w20, w30, #6",
+        "eor w21, w20, w30, ror #11",
+        "eor w20, w21, w30, ror #25",
+        "add w21, w22, w20",
+        "mov w20, v4.s[1]",
+        "add w22, w21, w20",
+        "mov w20, v12.s[1]",
+        "add w21, w22, w20",
+        "and w20, w23, w24",
+        "orr w22, w23, w24",
+        "and w23, w18, w22",
+        "orr w22, w23, w20",
+        "add w20, w21, w22",
+        "ror w22, w18, #2",
+        "eor w23, w22, w18, ror #13",
+        "eor w22, w23, w18, ror #22",
+        "add w23, w20, w22",
+        "mov w20, v12.s[3]",
+        "add w22, w21, w20",
+        "mov v13.16b, v12.16b",
+        "mov v13.s[3], w23",
+        "mov v12.16b, v13.16b",
+        "mov v12.s[2], w18",
+        "mov v13.16b, v12.16b",
+        "mov v13.s[1], w22",
+        "mov v12.16b, v13.16b",
+        "mov v12.s[0], w30",
+        "mov v20.16b, v12.16b",
+        "tbl v13.16b, {v4.16b}, v5.16b",
+        "mov v16.16b, v13.16b",
+        "mov w20, v12.s[1]",
+        "mov w21, v12.s[0]",
+        "mov w22, v7.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[3]",
-        "mov w24, v20.s[2]",
-        "mov w25, v17.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v17.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v17.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v17.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v17.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v17.16b, v5.16b",
-        "mov v17.s[0], w22",
-        "mov v16.16b, v24.16b",
-        "ext v16.16b, v21.16b, v24.16b, #4",
-        "add v23.4s, v23.4s, v16.4s",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v13.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v7.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v12.s[3]",
+        "mov w23, v12.s[2]",
+        "mov w25, v7.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v7.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v13.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v7.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v7.s[3]",
+        "add w21, w24, w20",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[3], w22",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[2], w18",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[1], w21",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[0], w30",
+        "mov v17.16b, v7.16b",
+        "mov v16.16b, v14.16b",
+        "ext v4.16b, v8.16b, v14.16b, #4",
+        "mov v16.16b, v4.16b",
+        "add v13.4s, v3.4s, v4.4s",
+        "mov v23.16b, v13.16b",
         "mov w20, #0x1000",
         "movk w20, #0x1, lsl #16",
-        "ldr q16, [x29, x20, sxtx]",
-        "mov w20, v24.s[2]",
-        "mov w21, v24.s[3]",
-        "mov w22, v23.s[0]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w20, w20, #10",
-        "eor w20, w23, w20",
-        "add w20, w22, w20",
-        "mov w22, v23.s[1]",
-        "ror w23, w21, #17",
-        "ror w24, w21, #19",
-        "eor w23, w23, w24",
-        "lsr w21, w21, #10",
-        "eor w21, w23, w21",
-        "add w21, w22, w21",
-        "mov w22, v23.s[2]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w24, w20, #10",
-        "eor w23, w23, w24",
-        "add w22, w22, w23",
-        "mov w23, v23.s[3]",
-        "ror w24, w21, #17",
-        "ror w25, w21, #19",
-        "eor w24, w24, w25",
-        "lsr w25, w21, #10",
-        "eor w24, w24, w25",
-        "add w23, w23, w24",
-        "mov v5.16b, v23.16b",
-        "mov v5.s[3], w23",
-        "mov v5.s[2], w22",
-        "mov v5.s[1], w21",
-        "mov v23.16b, v5.16b",
-        "mov v23.s[0], w20",
-        "add v16.4s, v16.4s, v22.4s",
+        "ldr x21, [sp, #32]",
+        "ldr q3, [x21, x20, sxtx]",
+        "mov v16.16b, v3.16b",
+        "mov w22, v14.s[2]",
+        "mov w23, v14.s[3]",
+        "mov w24, v13.s[0]",
+        "ror w25, w22, #17",
+        "ror w30, w22, #19",
+        "eor w18, w25, w30",
+        "lsr w25, w22, #10",
+        "eor w22, w18, w25",
+        "add w25, w24, w22",
+        "mov w22, v13.s[1]",
+        "ror w24, w23, #17",
+        "ror w30, w23, #19",
+        "eor w18, w24, w30",
+        "lsr w24, w23, #10",
+        "eor w23, w18, w24",
+        "add w24, w22, w23",
+        "mov w22, v13.s[2]",
+        "ror w23, w25, #17",
+        "ror w30, w25, #19",
+        "eor w18, w23, w30",
+        "lsr w23, w25, #10",
+        "eor w30, w18, w23",
+        "add w23, w22, w30",
+        "mov w22, v13.s[3]",
+        "ror w30, w24, #17",
+        "ror w18, w24, #19",
+        "str x20, [sp, #224]",
+        "eor w20, w30, w18",
+        "lsr w30, w24, #10",
+        "eor w18, w20, w30",
+        "add w20, w22, w18",
+        "mov v4.16b, v13.16b",
+        "mov v4.s[3], w20",
+        "mov v13.16b, v4.16b",
+        "mov v13.s[2], w23",
+        "mov v4.16b, v13.16b",
+        "mov v4.s[1], w24",
+        "mov v13.16b, v4.16b",
+        "mov v13.s[0], w25",
+        "mov v23.16b, v13.16b",
+        "add v4.4s, v3.4s, v9.4s",
+        "mov v16.16b, v4.16b",
+        "mov v0.16b, v9.16b",
         "unimplemented (Unimplemented)",
-        "mov w20, v17.s[1]",
-        "mov w21, v17.s[0]",
-        "mov w22, v20.s[1]",
+        "mov v3.16b, v0.16b",
+        "mov v22.16b, v3.16b",
+        "mov w20, v7.s[1]",
+        "mov w22, v7.s[0]",
+        "mov w23, v12.s[1]",
+        "and w24, w20, w22",
+        "bic w25, w23, w20",
+        "eor w23, w24, w25",
+        "ror w24, w20, #6",
+        "eor w25, w24, w20, ror #11",
+        "eor w24, w25, w20, ror #25",
+        "add w25, w23, w24",
+        "mov w23, v4.s[0]",
+        "add w24, w25, w23",
+        "mov w23, v12.s[0]",
+        "add w25, w24, w23",
+        "mov w23, v7.s[3]",
+        "mov w24, v7.s[2]",
+        "mov w30, v12.s[3]",
+        "and w18, w24, w30",
+        "orr w21, w24, w30",
+        "and w30, w23, w21",
+        "orr w21, w30, w18",
+        "add w30, w25, w21",
+        "ror w21, w23, #2",
+        "eor w18, w21, w23, ror #13",
+        "eor w21, w18, w23, ror #22",
+        "add w18, w30, w21",
+        "mov w21, v12.s[2]",
+        "add w30, w25, w21",
+        "and w21, w30, w20",
+        "bic w20, w22, w30",
+        "eor w22, w21, w20",
+        "ror w20, w30, #6",
+        "eor w21, w20, w30, ror #11",
+        "eor w20, w21, w30, ror #25",
+        "add w21, w22, w20",
+        "mov w20, v4.s[1]",
+        "add w22, w21, w20",
+        "mov w20, v12.s[1]",
+        "add w21, w22, w20",
+        "and w20, w23, w24",
+        "orr w22, w23, w24",
+        "and w23, w18, w22",
+        "orr w22, w23, w20",
+        "add w20, w21, w22",
+        "ror w22, w18, #2",
+        "eor w23, w22, w18, ror #13",
+        "eor w22, w23, w18, ror #22",
+        "add w23, w20, w22",
+        "mov w20, v12.s[3]",
+        "add w22, w21, w20",
+        "mov v9.16b, v12.16b",
+        "mov v9.s[3], w23",
+        "mov v12.16b, v9.16b",
+        "mov v12.s[2], w18",
+        "mov v9.16b, v12.16b",
+        "mov v9.s[1], w22",
+        "mov v12.16b, v9.16b",
+        "mov v12.s[0], w30",
+        "mov v20.16b, v12.16b",
+        "tbl v9.16b, {v4.16b}, v5.16b",
+        "mov v16.16b, v9.16b",
+        "mov w20, v12.s[1]",
+        "mov w21, v12.s[0]",
+        "mov w22, v7.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[3]",
-        "mov w24, v17.s[2]",
-        "mov w25, v20.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v20.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v20.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v20.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v20.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v20.16b, v5.16b",
-        "mov v20.s[0], w22",
-        "tbl v16.16b, {v16.16b}, v4.16b",
-        "mov w20, v20.s[1]",
-        "mov w21, v20.s[0]",
-        "mov w22, v17.s[1]",
-        "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
-        "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[3]",
-        "mov w24, v20.s[2]",
-        "mov w25, v17.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v17.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v17.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v17.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v17.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v17.16b, v5.16b",
-        "mov v17.s[0], w22",
-        "mov v16.16b, v23.16b",
-        "ext v16.16b, v24.16b, v23.16b, #4",
-        "add v22.4s, v22.4s, v16.4s",
-        "mov w20, #0x1000",
-        "movk w20, #0x1, lsl #16",
-        "ldr q16, [x29, x20, sxtx]",
-        "mov w20, v23.s[2]",
-        "mov w21, v23.s[3]",
-        "mov w22, v22.s[0]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w20, w20, #10",
-        "eor w20, w23, w20",
-        "add w20, w22, w20",
-        "mov w22, v22.s[1]",
-        "ror w23, w21, #17",
-        "ror w24, w21, #19",
-        "eor w23, w23, w24",
-        "lsr w21, w21, #10",
-        "eor w21, w23, w21",
-        "add w21, w22, w21",
-        "mov w22, v22.s[2]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w24, w20, #10",
-        "eor w23, w23, w24",
-        "add w22, w22, w23",
-        "mov w23, v22.s[3]",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v9.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v7.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v12.s[3]",
+        "mov w23, v12.s[2]",
+        "mov w25, v7.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v7.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v9.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v7.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v7.s[3]",
+        "add w21, w24, w20",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[3], w22",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[2], w18",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[1], w21",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[0], w30",
+        "mov v17.16b, v7.16b",
+        "mov v16.16b, v13.16b",
+        "ext v4.16b, v14.16b, v13.16b, #4",
+        "mov v16.16b, v4.16b",
+        "add v9.4s, v3.4s, v4.4s",
+        "mov v22.16b, v9.16b",
+        "ldr x20, [sp, #32]",
+        "ldr x21, [sp, #224]",
+        "ldr q3, [x20, x21, sxtx]",
+        "mov v16.16b, v3.16b",
+        "mov w21, v13.s[2]",
+        "mov w22, v13.s[3]",
+        "mov w23, v9.s[0]",
         "ror w24, w21, #17",
         "ror w25, w21, #19",
-        "eor w24, w24, w25",
-        "lsr w25, w21, #10",
-        "eor w24, w24, w25",
-        "add w23, w23, w24",
-        "mov v5.16b, v22.16b",
-        "mov v5.s[3], w23",
-        "mov v5.s[2], w22",
-        "mov v5.s[1], w21",
-        "mov v22.16b, v5.16b",
-        "mov v22.s[0], w20",
-        "add v16.4s, v16.4s, v21.4s",
+        "eor w30, w24, w25",
+        "lsr w24, w21, #10",
+        "eor w21, w30, w24",
+        "add w24, w23, w21",
+        "mov w21, v9.s[1]",
+        "ror w23, w22, #17",
+        "ror w25, w22, #19",
+        "eor w30, w23, w25",
+        "lsr w23, w22, #10",
+        "eor w22, w30, w23",
+        "add w23, w21, w22",
+        "mov w21, v9.s[2]",
+        "ror w22, w24, #17",
+        "ror w25, w24, #19",
+        "eor w30, w22, w25",
+        "lsr w22, w24, #10",
+        "eor w25, w30, w22",
+        "add w22, w21, w25",
+        "mov w21, v9.s[3]",
+        "ror w25, w23, #17",
+        "ror w30, w23, #19",
+        "eor w18, w25, w30",
+        "lsr w25, w23, #10",
+        "eor w30, w18, w25",
+        "add w25, w21, w30",
+        "mov v4.16b, v9.16b",
+        "mov v4.s[3], w25",
+        "mov v9.16b, v4.16b",
+        "mov v9.s[2], w22",
+        "mov v4.16b, v9.16b",
+        "mov v4.s[1], w23",
+        "mov v9.16b, v4.16b",
+        "mov v9.s[0], w24",
+        "mov v22.16b, v9.16b",
+        "add v4.4s, v3.4s, v8.4s",
+        "mov v16.16b, v4.16b",
+        "mov v0.16b, v8.16b",
         "unimplemented (Unimplemented)",
-        "mov w20, v17.s[1]",
-        "mov w21, v17.s[0]",
-        "mov w22, v20.s[1]",
-        "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
-        "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[3]",
-        "mov w24, v17.s[2]",
-        "mov w25, v20.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v20.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v20.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v20.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v20.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v20.16b, v5.16b",
-        "mov v20.s[0], w22",
-        "tbl v16.16b, {v16.16b}, v4.16b",
-        "mov w20, v20.s[1]",
-        "mov w21, v20.s[0]",
-        "mov w22, v17.s[1]",
+        "mov v3.16b, v0.16b",
+        "mov v21.16b, v3.16b",
+        "mov w21, v7.s[1]",
+        "mov w22, v7.s[0]",
+        "mov w23, v12.s[1]",
+        "and w24, w21, w22",
+        "bic w25, w23, w21",
+        "eor w23, w24, w25",
+        "ror w24, w21, #6",
+        "eor w25, w24, w21, ror #11",
+        "eor w24, w25, w21, ror #25",
+        "add w25, w23, w24",
+        "mov w23, v4.s[0]",
+        "add w24, w25, w23",
+        "mov w23, v12.s[0]",
+        "add w25, w24, w23",
+        "mov w23, v7.s[3]",
+        "mov w24, v7.s[2]",
+        "mov w30, v12.s[3]",
+        "and w18, w24, w30",
+        "orr w20, w24, w30",
+        "and w30, w23, w20",
+        "orr w20, w30, w18",
+        "add w30, w25, w20",
+        "ror w20, w23, #2",
+        "eor w18, w20, w23, ror #13",
+        "eor w20, w18, w23, ror #22",
+        "add w18, w30, w20",
+        "mov w20, v12.s[2]",
+        "add w30, w25, w20",
+        "and w20, w30, w21",
+        "bic w21, w22, w30",
+        "eor w22, w20, w21",
+        "ror w20, w30, #6",
+        "eor w21, w20, w30, ror #11",
+        "eor w20, w21, w30, ror #25",
+        "add w21, w22, w20",
+        "mov w20, v4.s[1]",
+        "add w22, w21, w20",
+        "mov w20, v12.s[1]",
+        "add w21, w22, w20",
+        "and w20, w23, w24",
+        "orr w22, w23, w24",
+        "and w23, w18, w22",
+        "orr w22, w23, w20",
+        "add w20, w21, w22",
+        "ror w22, w18, #2",
+        "eor w23, w22, w18, ror #13",
+        "eor w22, w23, w18, ror #22",
+        "add w23, w20, w22",
+        "mov w20, v12.s[3]",
+        "add w22, w21, w20",
+        "mov v8.16b, v12.16b",
+        "mov v8.s[3], w23",
+        "mov v12.16b, v8.16b",
+        "mov v12.s[2], w18",
+        "mov v8.16b, v12.16b",
+        "mov v8.s[1], w22",
+        "mov v12.16b, v8.16b",
+        "mov v12.s[0], w30",
+        "mov v20.16b, v12.16b",
+        "tbl v8.16b, {v4.16b}, v5.16b",
+        "mov v16.16b, v8.16b",
+        "mov w20, v12.s[1]",
+        "mov w21, v12.s[0]",
+        "mov w22, v7.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[3]",
-        "mov w24, v20.s[2]",
-        "mov w25, v17.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v17.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v17.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v17.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v17.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v17.16b, v5.16b",
-        "mov v17.s[0], w22",
-        "mov v16.16b, v22.16b",
-        "ext v16.16b, v23.16b, v22.16b, #4",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v8.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v7.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v12.s[3]",
+        "mov w23, v12.s[2]",
+        "mov w25, v7.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v7.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v8.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v7.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v7.s[3]",
+        "add w21, w24, w20",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[3], w22",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[2], w18",
+        "mov v4.16b, v7.16b",
+        "mov v4.s[1], w21",
+        "mov v7.16b, v4.16b",
+        "mov v7.s[0], w30",
+        "mov v17.16b, v7.16b",
+        "mov v16.16b, v9.16b",
+        "ext v4.16b, v13.16b, v9.16b, #4",
+        "mov v16.16b, v4.16b",
         "mov w20, #0x1000",
         "movk w20, #0x1, lsl #16",
-        "ldr q5, [x29, x20, sxtx]",
-        "add v23.4s, v23.4s, v5.4s",
-        "add v21.4s, v21.4s, v16.4s",
-        "ldr q16, [x29, x20, sxtx]",
-        "mov w21, v22.s[2]",
-        "mov w22, v22.s[3]",
-        "mov w23, v21.s[0]",
-        "ror w24, w21, #17",
-        "ror w25, w21, #19",
-        "eor w24, w24, w25",
-        "lsr w21, w21, #10",
-        "eor w21, w24, w21",
-        "add w21, w23, w21",
-        "mov w23, v21.s[1]",
-        "ror w24, w22, #17",
-        "ror w25, w22, #19",
-        "eor w24, w24, w25",
-        "lsr w22, w22, #10",
-        "eor w22, w24, w22",
-        "add w22, w23, w22",
-        "mov w23, v21.s[2]",
-        "ror w24, w21, #17",
-        "ror w25, w21, #19",
-        "eor w24, w24, w25",
-        "lsr w25, w21, #10",
-        "eor w24, w24, w25",
-        "add w23, w23, w24",
-        "mov w24, v21.s[3]",
+        "ldr x21, [sp, #32]",
+        "ldr q8, [x21, x20, sxtx]",
+        "add v15.4s, v13.4s, v8.4s",
+        "mov v23.16b, v15.16b",
+        "add v8.4s, v3.4s, v4.4s",
+        "mov v21.16b, v8.16b",
+        "ldr q3, [x21, x20, sxtx]",
+        "mov v16.16b, v3.16b",
+        "mov w22, v9.s[2]",
+        "mov w23, v9.s[3]",
+        "mov w24, v8.s[0]",
         "ror w25, w22, #17",
         "ror w30, w22, #19",
-        "eor w25, w25, w30",
-        "lsr w30, w22, #10",
-        "eor w25, w25, w30",
-        "add w24, w24, w25",
-        "mov v5.16b, v21.16b",
-        "mov v5.s[3], w24",
-        "mov v5.s[2], w23",
-        "mov v5.s[1], w22",
-        "mov v21.16b, v5.16b",
-        "mov v21.s[0], w21",
-        "ldr q5, [x29, x20, sxtx]",
-        "add v22.4s, v22.4s, v5.4s",
-        "ldr q5, [x29, x20, sxtx]",
-        "add v21.4s, v21.4s, v5.4s",
-        "add v16.4s, v16.4s, v24.4s",
-        "mov w20, v17.s[1]",
-        "mov w21, v17.s[0]",
-        "mov w22, v20.s[1]",
+        "eor w18, w25, w30",
+        "lsr w25, w22, #10",
+        "eor w22, w18, w25",
+        "add w25, w24, w22",
+        "mov w22, v8.s[1]",
+        "ror w24, w23, #17",
+        "ror w30, w23, #19",
+        "eor w18, w24, w30",
+        "lsr w24, w23, #10",
+        "eor w23, w18, w24",
+        "add w24, w22, w23",
+        "mov w22, v8.s[2]",
+        "ror w23, w25, #17",
+        "ror w30, w25, #19",
+        "eor w18, w23, w30",
+        "lsr w23, w25, #10",
+        "eor w30, w18, w23",
+        "add w23, w22, w30",
+        "mov w22, v8.s[3]",
+        "ror w30, w24, #17",
+        "ror w18, w24, #19",
+        "str x20, [sp, #256]",
+        "eor w20, w30, w18",
+        "lsr w30, w24, #10",
+        "eor w18, w20, w30",
+        "add w20, w22, w18",
+        "mov v4.16b, v8.16b",
+        "mov v4.s[3], w20",
+        "mov v8.16b, v4.16b",
+        "mov v8.s[2], w23",
+        "mov v4.16b, v8.16b",
+        "mov v4.s[1], w24",
+        "mov v8.16b, v4.16b",
+        "mov v8.s[0], w25",
+        "mov v21.16b, v8.16b",
+        "ldr x20, [sp, #256]",
+        "ldr q4, [x21, x20, sxtx]",
+        "add v13.4s, v9.4s, v4.4s",
+        "mov v22.16b, v13.16b",
+        "ldr q4, [x21, x20, sxtx]",
+        "add v9.4s, v8.4s, v4.4s",
+        "mov v21.16b, v9.16b",
+        "add v4.4s, v3.4s, v14.4s",
+        "mov v16.16b, v4.16b",
+        "mov w20, v7.s[1]",
+        "mov w21, v7.s[0]",
+        "mov w22, v12.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[3]",
-        "mov w24, v17.s[2]",
-        "mov w25, v20.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v20.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v20.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v20.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v20.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v20.16b, v5.16b",
-        "mov v20.s[0], w22",
-        "tbl v16.16b, {v16.16b}, v4.16b",
-        "mov w20, v20.s[1]",
-        "mov w21, v20.s[0]",
-        "mov w22, v17.s[1]",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v4.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v12.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v7.s[3]",
+        "mov w23, v7.s[2]",
+        "mov w25, v12.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v12.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v4.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v12.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v12.s[3]",
+        "add w21, w24, w20",
+        "mov v3.16b, v12.16b",
+        "mov v3.s[3], w22",
+        "mov v8.16b, v3.16b",
+        "mov v8.s[2], w18",
+        "mov v3.16b, v8.16b",
+        "mov v3.s[1], w21",
+        "mov v8.16b, v3.16b",
+        "mov v8.s[0], w30",
+        "mov v20.16b, v8.16b",
+        "tbl v3.16b, {v4.16b}, v5.16b",
+        "mov v16.16b, v3.16b",
+        "mov w20, v8.s[1]",
+        "mov w21, v8.s[0]",
+        "mov w22, v7.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[3]",
-        "mov w24, v20.s[2]",
-        "mov w25, v17.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v17.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v17.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v17.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v17.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v17.16b, v5.16b",
-        "mov v17.s[0], w22",
-        "mov v16.16b, v23.16b",
-        "mov w20, v17.s[1]",
-        "mov w21, v17.s[0]",
-        "mov w22, v20.s[1]",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v3.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v7.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v8.s[3]",
+        "mov w23, v8.s[2]",
+        "mov w25, v7.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v7.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v3.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v7.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v7.s[3]",
+        "add w21, w24, w20",
+        "mov v3.16b, v7.16b",
+        "mov v3.s[3], w22",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[2], w18",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[1], w21",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[0], w30",
+        "mov v17.16b, v4.16b",
+        "mov v16.16b, v15.16b",
+        "mov w20, v4.s[1]",
+        "mov w21, v4.s[0]",
+        "mov w22, v8.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v23.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[3]",
-        "mov w24, v17.s[2]",
-        "mov w25, v20.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v20.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v23.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v20.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v20.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v20.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v20.16b, v5.16b",
-        "mov v20.s[0], w22",
-        "tbl v16.16b, {v23.16b}, v4.16b",
-        "mov w20, v20.s[1]",
-        "mov w21, v20.s[0]",
-        "mov w22, v17.s[1]",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v15.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v8.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v4.s[3]",
+        "mov w23, v4.s[2]",
+        "mov w25, v8.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v8.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v15.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v8.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v8.s[3]",
+        "add w21, w24, w20",
+        "mov v3.16b, v8.16b",
+        "mov v3.s[3], w22",
+        "mov v7.16b, v3.16b",
+        "mov v7.s[2], w18",
+        "mov v3.16b, v7.16b",
+        "mov v3.s[1], w21",
+        "mov v7.16b, v3.16b",
+        "mov v7.s[0], w30",
+        "mov v20.16b, v7.16b",
+        "tbl v3.16b, {v15.16b}, v5.16b",
+        "mov v16.16b, v3.16b",
+        "mov w20, v7.s[1]",
+        "mov w21, v7.s[0]",
+        "mov w22, v4.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[3]",
-        "mov w24, v20.s[2]",
-        "mov w25, v17.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v17.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v17.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v17.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v17.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v17.16b, v5.16b",
-        "mov v17.s[0], w22",
-        "mov v16.16b, v22.16b",
-        "mov w20, v17.s[1]",
-        "mov w21, v17.s[0]",
-        "mov w22, v20.s[1]",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v3.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v4.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v7.s[3]",
+        "mov w23, v7.s[2]",
+        "mov w25, v4.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v4.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v3.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v4.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v4.s[3]",
+        "add w21, w24, w20",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[3], w22",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[2], w18",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[1], w21",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[0], w30",
+        "mov v17.16b, v4.16b",
+        "mov v16.16b, v13.16b",
+        "mov w20, v4.s[1]",
+        "mov w21, v4.s[0]",
+        "mov w22, v7.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v22.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[3]",
-        "mov w24, v17.s[2]",
-        "mov w25, v20.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v20.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v22.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v20.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v20.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v20.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v20.16b, v5.16b",
-        "mov v20.s[0], w22",
-        "tbl v16.16b, {v22.16b}, v4.16b",
-        "mov w20, v20.s[1]",
-        "mov w21, v20.s[0]",
-        "mov w22, v17.s[1]",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v13.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v7.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v4.s[3]",
+        "mov w23, v4.s[2]",
+        "mov w25, v7.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v7.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v13.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v7.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v7.s[3]",
+        "add w21, w24, w20",
+        "mov v3.16b, v7.16b",
+        "mov v3.s[3], w22",
+        "mov v7.16b, v3.16b",
+        "mov v7.s[2], w18",
+        "mov v3.16b, v7.16b",
+        "mov v3.s[1], w21",
+        "mov v7.16b, v3.16b",
+        "mov v7.s[0], w30",
+        "mov v20.16b, v7.16b",
+        "tbl v3.16b, {v13.16b}, v5.16b",
+        "mov v16.16b, v3.16b",
+        "mov w20, v7.s[1]",
+        "mov w21, v7.s[0]",
+        "mov w22, v4.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[3]",
-        "mov w24, v20.s[2]",
-        "mov w25, v17.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v17.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v17.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v17.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v17.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v17.16b, v5.16b",
-        "mov v17.s[0], w22",
-        "mov v16.16b, v21.16b",
-        "mov w20, v17.s[1]",
-        "mov w21, v17.s[0]",
-        "mov w22, v20.s[1]",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v3.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v4.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v7.s[3]",
+        "mov w23, v7.s[2]",
+        "mov w25, v4.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v4.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v3.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v4.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v4.s[3]",
+        "add w21, w24, w20",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[3], w22",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[2], w18",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[1], w21",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[0], w30",
+        "mov v17.16b, v4.16b",
+        "mov v16.16b, v9.16b",
+        "mov w20, v4.s[1]",
+        "mov w21, v4.s[0]",
+        "mov w22, v7.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v21.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[3]",
-        "mov w24, v17.s[2]",
-        "mov w25, v20.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v20.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v21.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v20.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v20.s[3]",
-        "add w20, w20, w23",
-        "mov v5.16b, v20.16b",
-        "mov v5.s[3], w21",
-        "mov v5.s[2], w25",
-        "mov v5.s[1], w20",
-        "mov v20.16b, v5.16b",
-        "mov v20.s[0], w22",
-        "tbl v16.16b, {v21.16b}, v4.16b",
-        "mov w20, v20.s[1]",
-        "mov w21, v20.s[0]",
-        "mov w22, v17.s[1]",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v9.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v7.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v4.s[3]",
+        "mov w23, v4.s[2]",
+        "mov w25, v7.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v7.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v9.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v7.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v7.s[3]",
+        "add w21, w24, w20",
+        "mov v3.16b, v7.16b",
+        "mov v3.s[3], w22",
+        "mov v7.16b, v3.16b",
+        "mov v7.s[2], w18",
+        "mov v3.16b, v7.16b",
+        "mov v3.s[1], w21",
+        "mov v7.16b, v3.16b",
+        "mov v7.s[0], w30",
+        "mov v20.16b, v7.16b",
+        "tbl v3.16b, {v9.16b}, v5.16b",
+        "mov v16.16b, v3.16b",
+        "mov w20, v7.s[1]",
+        "mov w21, v7.s[0]",
+        "mov w22, v4.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v20.s[3]",
-        "mov w24, v20.s[2]",
-        "mov w25, v17.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v17.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v17.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v17.s[3]",
-        "add w20, w20, w23",
-        "mov v4.16b, v17.16b",
-        "mov v4.s[3], w21",
-        "mov v4.s[2], w25",
-        "mov v4.s[1], w20",
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v3.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v4.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v7.s[3]",
+        "mov w23, v7.s[2]",
+        "mov w25, v4.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v4.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v3.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v4.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v4.s[3]",
+        "add w21, w24, w20",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[3], w22",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[2], w18",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[1], w21",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[0], w30",
         "mov v17.16b, v4.16b",
-        "mov v17.s[0], w22",
-        "add v20.4s, v20.4s, v18.4s",
-        "add v17.4s, v17.4s, v19.4s",
-        "tbl v20.16b, {v20.16b}, v2.16b",
-        "tbl v17.16b, {v17.16b}, v3.16b",
-        "mov v16.16b, v17.16b",
-        "mov v16.16b, v17.16b",
-        "mov v16.d[1], v20.d[1]",
-        "ext v20.16b, v17.16b, v20.16b, #8",
-        "str q16, [x11, #256]",
-        "str q20, [x11, #272]"
+        "add v3.4s, v7.4s, v11.4s",
+        "mov v20.16b, v3.16b",
+        "add v5.4s, v4.4s, v10.4s",
+        "mov v17.16b, v5.16b",
+        "tbl v4.16b, {v3.16b}, v6.16b",
+        "mov v20.16b, v4.16b",
+        "tbl v3.16b, {v5.16b}, v2.16b",
+        "mov v17.16b, v3.16b",
+        "mov v16.16b, v3.16b",
+        "mov v2.16b, v3.16b",
+        "mov v2.d[1], v4.d[1]",
+        "mov v16.16b, v2.16b",
+        "ext v5.16b, v3.16b, v4.16b, #8",
+        "mov v20.16b, v5.16b",
+        "ldr x20, [sp]",
+        "str q2, [x20, #256]",
+        "str q5, [x20, #272]",
+        "add sp, sp, #0x120 (288)"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FlagM/Atomics.json b/unittests/InstructionCountCI/FlagM/Atomics.json
index d535a3386d..a17f2e3c39 100644
--- a/unittests/InstructionCountCI/FlagM/Atomics.json
+++ b/unittests/InstructionCountCI/FlagM/Atomics.json
@@ -12,1464 +12,1802 @@
   },
   "Instructions": {
     "lock add byte [rax], cl": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x00",
       "ExpectedArm64ASM": [
-        "ldaddalb w5, w20, [x4]",
-        "eor w27, w20, w5",
-        "lsl w0, w20, #24",
-        "cmn w0, w5, lsl #24",
-        "add w26, w20, w5"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldaddalb w20, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x27, x21",
+        "lsl w0, w22, #24",
+        "cmn w0, w20, lsl #24",
+        "add w21, w22, w20",
+        "mov x26, x21"
       ]
     },
     "lock add word [rax], cx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x01",
       "ExpectedArm64ASM": [
-        "ldaddalh w5, w20, [x4]",
-        "eor w27, w20, w5",
-        "lsl w0, w20, #16",
-        "cmn w0, w5, lsl #16",
-        "add w26, w20, w5"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldaddalh w20, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x27, x21",
+        "lsl w0, w22, #16",
+        "cmn w0, w20, lsl #16",
+        "add w21, w22, w20",
+        "mov x26, x21"
       ]
     },
     "lock add dword [rax], ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x01",
       "ExpectedArm64ASM": [
-        "ldaddal w5, w20, [x4]",
-        "eor w27, w20, w5",
-        "adds w26, w20, w5"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldaddal w20, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x27, x21",
+        "adds w21, w22, w20",
+        "mov x26, x21"
       ]
     },
     "lock or byte [rax], cl": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x08",
       "ExpectedArm64ASM": [
-        "ldsetalb w5, w20, [x4]",
-        "orr w26, w20, w5",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldsetalb w20, w22, [x21]",
+        "orr w21, w22, w20",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "lock or word [rax], cx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x09",
       "ExpectedArm64ASM": [
-        "ldsetalh w5, w20, [x4]",
-        "orr w26, w20, w5",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldsetalh w20, w22, [x21]",
+        "orr w21, w22, w20",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #16"
       ]
     },
     "lock or dword [rax], ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x09",
       "ExpectedArm64ASM": [
-        "ldsetal w5, w20, [x4]",
-        "orr w26, w20, w5",
-        "tst w26, w26"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldsetal w20, w22, [x21]",
+        "orr w21, w22, w20",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "lock adc byte [rax], cl": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 22,
       "Comment": "0x10",
       "ExpectedArm64ASM": [
-        "adc w20, wzr, w5",
-        "ldaddalb w20, w20, [x4]",
-        "eor w27, w20, w5",
+        "mov x20, x5",
+        "adc w21, wzr, w20",
+        "mov x22, x4",
+        "ldaddalb w21, w23, [x22]",
+        "eor w21, w23, w20",
+        "mov x27, x21",
         "cset w21, hs",
-        "adc w22, w20, w5",
-        "uxtb w26, w22",
-        "cmp x26, x5",
+        "adc w22, w23, w20",
+        "uxtb w24, w22",
+        "cmp x24, x20",
         "cset x22, lo",
-        "cmp x26, x5",
-        "cset x23, ls",
+        "cmp x24, x20",
+        "cset x25, ls",
         "cmp x21, #0x1 (1)",
-        "csel x21, x23, x22, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x21, #63, #nzCv",
-        "eor w21, w20, w5",
-        "eor w20, w26, w20",
-        "bic w20, w20, w21",
-        "rmif x20, #7, #nzcV"
+        "csel x30, x25, x22, eq",
+        "cmn wzr, w24, lsl #24",
+        "rmif x30, #63, #nzCv",
+        "eor w21, w23, w20",
+        "eor w20, w24, w23",
+        "bic w22, w20, w21",
+        "rmif x22, #7, #nzcV",
+        "mov x26, x24"
       ]
     },
     "lock adc word [rax], cx": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 22,
       "Comment": "0x11",
       "ExpectedArm64ASM": [
-        "adc w20, wzr, w5",
-        "ldaddalh w20, w20, [x4]",
-        "eor w27, w20, w5",
+        "mov x20, x5",
+        "adc w21, wzr, w20",
+        "mov x22, x4",
+        "ldaddalh w21, w23, [x22]",
+        "eor w21, w23, w20",
+        "mov x27, x21",
         "cset w21, hs",
-        "adc w22, w20, w5",
-        "uxth w26, w22",
-        "cmp x26, x5",
+        "adc w22, w23, w20",
+        "uxth w24, w22",
+        "cmp x24, x20",
         "cset x22, lo",
-        "cmp x26, x5",
-        "cset x23, ls",
+        "cmp x24, x20",
+        "cset x25, ls",
         "cmp x21, #0x1 (1)",
-        "csel x21, x23, x22, eq",
-        "cmn wzr, w26, lsl #16",
-        "rmif x21, #63, #nzCv",
-        "eor w21, w20, w5",
-        "eor w20, w26, w20",
-        "bic w20, w20, w21",
-        "rmif x20, #15, #nzcV"
+        "csel x30, x25, x22, eq",
+        "cmn wzr, w24, lsl #16",
+        "rmif x30, #63, #nzCv",
+        "eor w21, w23, w20",
+        "eor w20, w24, w23",
+        "bic w22, w20, w21",
+        "rmif x22, #15, #nzcV",
+        "mov x26, x24"
       ]
     },
     "lock adc dword [rax], ecx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x11",
       "ExpectedArm64ASM": [
-        "adc w20, wzr, w5",
-        "ldaddal w20, w20, [x4]",
-        "eor w27, w20, w5",
-        "adcs w26, w20, w5"
+        "mov x20, x5",
+        "adc w21, wzr, w20",
+        "mov x22, x4",
+        "ldaddal w21, w23, [x22]",
+        "eor w21, w23, w20",
+        "mov x27, x21",
+        "adcs w21, w23, w20",
+        "mov x26, x21"
       ]
     },
     "lock sbb byte [rax], cl": {
-      "ExpectedInstructionCount": 20,
+      "ExpectedInstructionCount": 24,
       "Comment": "0x18",
       "ExpectedArm64ASM": [
-        "adc w20, wzr, w5",
-        "neg w1, w20",
-        "ldaddalb w1, w20, [x4]",
-        "eor w27, w20, w5",
+        "mov x20, x5",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddalb w1, w23, [x21]",
+        "eor w21, w23, w20",
+        "mov x27, x21",
         "cset w21, hs",
-        "add w22, w5, w21",
-        "sub w22, w20, w22",
-        "uxtb w26, w22",
-        "cmp w26, w20",
-        "cset x22, hi",
-        "cmp w26, w20",
-        "cset x23, hs",
+        "add w22, w20, w21",
+        "sub w24, w23, w22",
+        "uxtb w22, w24",
+        "cmp w22, w23",
+        "cset x24, hi",
+        "cmp w22, w23",
+        "cset x25, hs",
         "cmp x21, #0x1 (1)",
-        "csel x21, x23, x22, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x21, #63, #nzCv",
-        "eor w21, w20, w5",
-        "eor w20, w26, w20",
-        "and w20, w20, w21",
-        "rmif x20, #7, #nzcV"
+        "csel x30, x25, x24, eq",
+        "cmn wzr, w22, lsl #24",
+        "rmif x30, #63, #nzCv",
+        "eor w21, w23, w20",
+        "eor w20, w22, w23",
+        "and w23, w20, w21",
+        "rmif x23, #7, #nzcV",
+        "mov x26, x22"
       ]
     },
     "lock sbb word [rax], cx": {
-      "ExpectedInstructionCount": 20,
+      "ExpectedInstructionCount": 24,
       "Comment": "0x19",
       "ExpectedArm64ASM": [
-        "adc w20, wzr, w5",
-        "neg w1, w20",
-        "ldaddalh w1, w20, [x4]",
-        "eor w27, w20, w5",
+        "mov x20, x5",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddalh w1, w23, [x21]",
+        "eor w21, w23, w20",
+        "mov x27, x21",
         "cset w21, hs",
-        "add w22, w5, w21",
-        "sub w22, w20, w22",
-        "uxth w26, w22",
-        "cmp w26, w20",
-        "cset x22, hi",
-        "cmp w26, w20",
-        "cset x23, hs",
+        "add w22, w20, w21",
+        "sub w24, w23, w22",
+        "uxth w22, w24",
+        "cmp w22, w23",
+        "cset x24, hi",
+        "cmp w22, w23",
+        "cset x25, hs",
         "cmp x21, #0x1 (1)",
-        "csel x21, x23, x22, eq",
-        "cmn wzr, w26, lsl #16",
-        "rmif x21, #63, #nzCv",
-        "eor w21, w20, w5",
-        "eor w20, w26, w20",
-        "and w20, w20, w21",
-        "rmif x20, #15, #nzcV"
+        "csel x30, x25, x24, eq",
+        "cmn wzr, w22, lsl #16",
+        "rmif x30, #63, #nzCv",
+        "eor w21, w23, w20",
+        "eor w20, w22, w23",
+        "and w23, w20, w21",
+        "rmif x23, #15, #nzcV",
+        "mov x26, x22"
       ]
     },
     "lock sbb dword [rax], ecx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x19",
       "ExpectedArm64ASM": [
-        "adc w20, wzr, w5",
-        "neg w1, w20",
-        "ldaddal w1, w20, [x4]",
-        "eor w27, w20, w5",
+        "mov x20, x5",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddal w1, w23, [x21]",
+        "eor w21, w23, w20",
+        "mov x27, x21",
         "cfinv",
-        "sbcs w26, w20, w5",
-        "cfinv"
+        "sbcs w21, w23, w20",
+        "cfinv",
+        "mov x26, x21"
       ]
     },
     "lock and byte [rax], cl": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x20",
       "ExpectedArm64ASM": [
-        "mvn w1, w5",
-        "ldclralb w1, w20, [x4]",
-        "and w26, w20, w5",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x5",
+        "mov x21, x4",
+        "mvn w1, w20",
+        "ldclralb w1, w22, [x21]",
+        "and w21, w22, w20",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21"
       ]
     },
     "lock and word [rax], cx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x21",
       "ExpectedArm64ASM": [
-        "mvn w1, w5",
-        "ldclralh w1, w20, [x4]",
-        "and w26, w20, w5",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x5",
+        "mov x21, x4",
+        "mvn w1, w20",
+        "ldclralh w1, w22, [x21]",
+        "and w21, w22, w20",
+        "cmn wzr, w21, lsl #16",
+        "mov x26, x21"
       ]
     },
     "lock and dword [rax], ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x21",
       "ExpectedArm64ASM": [
-        "mvn w1, w5",
-        "ldclral w1, w20, [x4]",
-        "ands w26, w20, w5"
+        "mov x20, x5",
+        "mov x21, x4",
+        "mvn w1, w20",
+        "ldclral w1, w22, [x21]",
+        "ands w21, w22, w20",
+        "mov x26, x21"
       ]
     },
     "lock sub byte [rax], cl": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x28",
       "ExpectedArm64ASM": [
-        "neg w1, w5",
-        "ldaddalb w1, w20, [x4]",
-        "eor w27, w20, w5",
-        "lsl w0, w20, #24",
-        "cmp w0, w5, lsl #24",
-        "sub w26, w20, w5",
+        "mov x20, x5",
+        "mov x21, x4",
+        "neg w1, w20",
+        "ldaddalb w1, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x27, x21",
+        "lsl w0, w22, #24",
+        "cmp w0, w20, lsl #24",
+        "sub w21, w22, w20",
+        "mov x26, x21",
         "cfinv"
       ]
     },
     "lock sub word [rax], cx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x28",
       "ExpectedArm64ASM": [
-        "neg w1, w5",
-        "ldaddalh w1, w20, [x4]",
-        "eor w27, w20, w5",
-        "lsl w0, w20, #16",
-        "cmp w0, w5, lsl #16",
-        "sub w26, w20, w5",
+        "mov x20, x5",
+        "mov x21, x4",
+        "neg w1, w20",
+        "ldaddalh w1, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x27, x21",
+        "lsl w0, w22, #16",
+        "cmp w0, w20, lsl #16",
+        "sub w21, w22, w20",
+        "mov x26, x21",
         "cfinv"
       ]
     },
     "lock sub dword [rax], ecx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x29",
       "ExpectedArm64ASM": [
-        "neg w1, w5",
-        "ldaddal w1, w20, [x4]",
-        "eor w27, w20, w5",
-        "subs w26, w20, w5",
+        "mov x20, x5",
+        "mov x21, x4",
+        "neg w1, w20",
+        "ldaddal w1, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x27, x21",
+        "subs w21, w22, w20",
+        "mov x26, x21",
         "cfinv"
       ]
     },
     "lock xor byte [rax], cl": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x30",
       "ExpectedArm64ASM": [
-        "ldeoralb w5, w20, [x4]",
-        "eor w26, w20, w5",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldeoralb w20, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "lock xor word [rax], cx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x31",
       "ExpectedArm64ASM": [
-        "ldeoralh w5, w20, [x4]",
-        "eor w26, w20, w5",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldeoralh w20, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #16"
       ]
     },
     "lock xor dword [rax], ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x31",
       "ExpectedArm64ASM": [
-        "ldeoral w5, w20, [x4]",
-        "eor w26, w20, w5",
-        "tst w26, w26"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldeoral w20, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "lock add qword [rax], rcx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x01",
       "ExpectedArm64ASM": [
-        "ldaddal x5, x20, [x4]",
-        "eor w27, w20, w5",
-        "adds x26, x20, x5"
+        "mov x20, x5",
+        "mov x21, x4",
+        "ldaddal x20, x22, [x21]",
+        "eor w21, w22, w20",
+        "mov x27, x21",
+        "adds x21, x22, x20",
+        "mov x26, x21"
       ]
     },
     "xadd byte [rax], bl": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xc0",
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "ldaddalb w20, w21, [x4]",
-        "bfxil x7, x21, #0, #8",
-        "eor w27, w21, w20",
-        "lsl w0, w21, #24",
-        "cmn w0, w20, lsl #24",
-        "add w26, w21, w20"
+        "mov x20, x4",
+        "mov x21, x7",
+        "uxtb w22, w21",
+        "ldaddalb w22, w23, [x20]",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #8",
+        "mov x7, x20",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "lsl w0, w23, #24",
+        "cmn w0, w22, lsl #24",
+        "add w20, w23, w22",
+        "mov x26, x20"
       ]
     },
     "xadd word [rax], bx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "ldaddalh w20, w21, [x4]",
-        "bfxil x7, x21, #0, #16",
-        "eor w27, w21, w20",
-        "lsl w0, w21, #16",
-        "cmn w0, w20, lsl #16",
-        "add w26, w21, w20"
+        "mov x20, x4",
+        "mov x21, x7",
+        "uxth w22, w21",
+        "ldaddalh w22, w23, [x20]",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x7, x20",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "lsl w0, w23, #16",
+        "cmn w0, w22, lsl #16",
+        "add w20, w23, w22",
+        "mov x26, x20"
       ]
     },
     "xadd dword [rax], ebx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "ldaddal w20, w7, [x4]",
-        "eor w27, w7, w20",
-        "adds w26, w7, w20"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov w22, w21",
+        "ldaddal w22, w21, [x20]",
+        "mov x7, x21",
+        "eor w20, w21, w22",
+        "mov x27, x20",
+        "adds w20, w21, w22",
+        "mov x26, x20"
       ]
     },
     "xadd qword [rax], rbx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
-        "mov x20, x7",
-        "ldaddal x20, x7, [x4]",
-        "eor w27, w7, w20",
-        "adds x26, x7, x20"
+        "mov x20, x4",
+        "mov x21, x7",
+        "ldaddal x21, x22, [x20]",
+        "mov x7, x22",
+        "eor w20, w22, w21",
+        "mov x27, x20",
+        "adds x20, x22, x21",
+        "mov x26, x20"
       ]
     },
     "lock add byte [rax], 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x80 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldaddalb w20, w27, [x4]",
-        "lsl w0, w27, #24",
+        "mov x21, x4",
+        "ldaddalb w20, w22, [x21]",
+        "mov x27, x22",
+        "lsl w0, w22, #24",
         "cmn w0, w20, lsl #24",
-        "add w26, w27, #0x1 (1)"
+        "add w20, w22, #0x1 (1)",
+        "mov x26, x20"
       ]
     },
     "lock add byte [rax], 0xFF": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP1 0x80 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "ldaddalb w20, w21, [x4]",
-        "mvn w27, w21",
-        "lsl w0, w21, #24",
+        "mov x21, x4",
+        "ldaddalb w20, w22, [x21]",
+        "mvn w21, w22",
+        "mov x27, x21",
+        "lsl w0, w22, #24",
         "cmn w0, w20, lsl #24",
-        "add w26, w21, #0xff (255)"
+        "add w20, w22, #0xff (255)",
+        "mov x26, x20"
       ]
     },
     "lock add word [rax], 0x100": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldaddalh w20, w27, [x4]",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "ldaddalh w20, w22, [x21]",
+        "mov x27, x22",
+        "lsl w0, w22, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w27, #0x100 (256)"
+        "add w20, w22, #0x100 (256)",
+        "mov x26, x20"
       ]
     },
     "lock add word [rax], 0xFFFF": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "ldaddalh w20, w21, [x4]",
-        "mvn w27, w21",
-        "lsl w0, w21, #16",
+        "mov x21, x4",
+        "ldaddalh w20, w22, [x21]",
+        "mvn w21, w22",
+        "mov x27, x21",
+        "lsl w0, w22, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w21, w20"
+        "add w21, w22, w20",
+        "mov x26, x21"
       ]
     },
     "lock add dword [rax], 0x100": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldaddal w20, w27, [x4]",
-        "adds w26, w27, #0x100 (256)"
+        "mov x21, x4",
+        "ldaddal w20, w22, [x21]",
+        "mov x27, x22",
+        "adds w20, w22, #0x100 (256)",
+        "mov x26, x20"
       ]
     },
     "lock add dword [rax], 0xFFFFFFFF": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "ldaddal w20, w21, [x4]",
-        "mvn w27, w21",
-        "adds w26, w21, w20"
+        "mov x21, x4",
+        "ldaddal w20, w22, [x21]",
+        "mvn w21, w22",
+        "mov x27, x21",
+        "adds w21, w22, w20",
+        "mov x26, x21"
       ]
     },
     "lock add qword [rax], 0x100": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldaddal x20, x27, [x4]",
-        "adds x26, x27, #0x100 (256)"
+        "mov x21, x4",
+        "ldaddal x20, x22, [x21]",
+        "mov x27, x22",
+        "adds x20, x22, #0x100 (256)",
+        "mov x26, x20"
       ]
     },
     "lock add qword [rax], -2147483647": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffff80000001",
-        "ldaddal x20, x27, [x4]",
-        "adds x26, x27, x20"
+        "mov x21, x4",
+        "ldaddal x20, x22, [x21]",
+        "mov x27, x22",
+        "adds x21, x22, x20",
+        "mov x26, x21"
       ]
     },
     "lock add word [rax], 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldaddalh w20, w27, [x4]",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "ldaddalh w20, w22, [x21]",
+        "mov x27, x22",
+        "lsl w0, w22, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w27, #0x1 (1)"
+        "add w20, w22, #0x1 (1)",
+        "mov x26, x20"
       ]
     },
     "lock add dword [rax], 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldaddal w20, w27, [x4]",
-        "adds w26, w27, #0x1 (1)"
+        "mov x21, x4",
+        "ldaddal w20, w22, [x21]",
+        "mov x27, x22",
+        "adds w20, w22, #0x1 (1)",
+        "mov x26, x20"
       ]
     },
     "lock add qword [rax], 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldaddal x20, x27, [x4]",
-        "adds x26, x27, #0x1 (1)"
+        "mov x21, x4",
+        "ldaddal x20, x22, [x21]",
+        "mov x27, x22",
+        "adds x20, x22, #0x1 (1)",
+        "mov x26, x20"
       ]
     },
     "lock or byte [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x80 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldsetalb w20, w20, [x4]",
-        "orr w26, w20, #0x1",
-        "cmn wzr, w26, lsl #24"
+        "mov x21, x4",
+        "ldsetalb w20, w22, [x21]",
+        "orr w20, w22, #0x1",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #24"
       ]
     },
     "lock or byte [rax], 0xFF": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x80 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "ldsetalb w20, w20, [x4]",
-        "orr w26, w20, #0xff",
-        "cmn wzr, w26, lsl #24"
+        "mov x21, x4",
+        "ldsetalb w20, w22, [x21]",
+        "orr w20, w22, #0xff",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #24"
       ]
     },
     "lock or word [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldsetalh w20, w20, [x4]",
-        "orr w26, w20, #0x100",
-        "cmn wzr, w26, lsl #16"
+        "mov x21, x4",
+        "ldsetalh w20, w22, [x21]",
+        "orr w20, w22, #0x100",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #16"
       ]
     },
     "lock or word [rax], 0xFFFF": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "ldsetalh w20, w20, [x4]",
-        "orr w26, w20, #0xffff",
-        "cmn wzr, w26, lsl #16"
+        "mov x21, x4",
+        "ldsetalh w20, w22, [x21]",
+        "orr w20, w22, #0xffff",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #16"
       ]
     },
     "lock or dword [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldsetal w20, w20, [x4]",
-        "orr w26, w20, #0x100",
-        "tst w26, w26"
+        "mov x21, x4",
+        "ldsetal w20, w22, [x21]",
+        "orr w20, w22, #0x100",
+        "mov x26, x20",
+        "tst w20, w20"
       ]
     },
     "lock or dword [rax], 0xFFFFFFFF": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "ldsetal w20, w21, [x4]",
-        "orr w26, w21, w20",
-        "tst w26, w26"
+        "mov x21, x4",
+        "ldsetal w20, w22, [x21]",
+        "orr w21, w22, w20",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "lock or qword [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldsetal x20, x20, [x4]",
-        "orr x26, x20, #0x100",
-        "tst x26, x26"
+        "mov x21, x4",
+        "ldsetal x20, x22, [x21]",
+        "orr x20, x22, #0x100",
+        "mov x26, x20",
+        "tst x20, x20"
       ]
     },
     "lock or qword [rax], -2147483647": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffff80000001",
-        "ldsetal x20, x20, [x4]",
-        "orr x26, x20, #0xffffffff80000001",
-        "tst x26, x26"
+        "mov x21, x4",
+        "ldsetal x20, x22, [x21]",
+        "orr x20, x22, #0xffffffff80000001",
+        "mov x26, x20",
+        "tst x20, x20"
       ]
     },
     "lock or word [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldsetalh w20, w20, [x4]",
-        "orr w26, w20, #0x1",
-        "cmn wzr, w26, lsl #16"
+        "mov x21, x4",
+        "ldsetalh w20, w22, [x21]",
+        "orr w20, w22, #0x1",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #16"
       ]
     },
     "lock or dword [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldsetal w20, w20, [x4]",
-        "orr w26, w20, #0x1",
-        "tst w26, w26"
+        "mov x21, x4",
+        "ldsetal w20, w22, [x21]",
+        "orr w20, w22, #0x1",
+        "mov x26, x20",
+        "tst w20, w20"
       ]
     },
     "lock or qword [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldsetal x20, x20, [x4]",
-        "orr x26, x20, #0x1",
-        "tst x26, x26"
+        "mov x21, x4",
+        "ldsetal x20, x22, [x21]",
+        "orr x20, x22, #0x1",
+        "mov x26, x20",
+        "tst x20, x20"
       ]
     },
     "lock adc byte [rax], 1": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 19,
       "Comment": "GROUP1 0x80 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
         "adc w21, wzr, w20",
-        "ldaddalb w21, w27, [x4]",
+        "mov x22, x4",
+        "ldaddalb w21, w23, [x22]",
+        "mov x27, x23",
         "cset w21, hs",
-        "adc w20, w27, w20",
-        "uxtb w26, w20",
-        "cmp w26, #0x1 (1)",
-        "cset x20, lo",
-        "cmp w26, #0x1 (1)",
-        "cset x22, ls",
+        "adc w22, w23, w20",
+        "uxtb w20, w22",
+        "cmp w20, #0x1 (1)",
+        "cset x22, lo",
+        "cmp w20, #0x1 (1)",
+        "cset x24, ls",
         "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w26, w27",
-        "rmif x20, #7, #nzcV"
+        "csel x25, x24, x22, eq",
+        "cmn wzr, w20, lsl #24",
+        "rmif x25, #63, #nzCv",
+        "bic w21, w20, w23",
+        "rmif x21, #7, #nzcV",
+        "mov x26, x20"
       ]
     },
     "lock adc byte [rax], 0xFF": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 20,
       "Comment": "GROUP1 0x80 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
         "adc w21, wzr, w20",
-        "ldaddalb w21, w21, [x4]",
-        "mvn w27, w21",
-        "cset w22, hs",
-        "adc w20, w21, w20",
-        "uxtb w26, w20",
-        "cmp w26, #0xff (255)",
-        "cset x20, lo",
-        "cmp w26, #0xff (255)",
-        "cset x23, ls",
-        "cmp x22, #0x1 (1)",
-        "csel x20, x23, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w21, w26",
-        "rmif x20, #7, #nzcV"
+        "mov x22, x4",
+        "ldaddalb w21, w23, [x22]",
+        "mvn w21, w23",
+        "mov x27, x21",
+        "cset w21, hs",
+        "adc w22, w23, w20",
+        "uxtb w20, w22",
+        "cmp w20, #0xff (255)",
+        "cset x22, lo",
+        "cmp w20, #0xff (255)",
+        "cset x24, ls",
+        "cmp x21, #0x1 (1)",
+        "csel x25, x24, x22, eq",
+        "cmn wzr, w20, lsl #24",
+        "rmif x25, #63, #nzCv",
+        "bic w21, w23, w20",
+        "rmif x21, #7, #nzcV",
+        "mov x26, x20"
       ]
     },
     "lock adc word [rax], 0x100": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 19,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
         "adc w21, wzr, w20",
-        "ldaddalh w21, w27, [x4]",
+        "mov x22, x4",
+        "ldaddalh w21, w23, [x22]",
+        "mov x27, x23",
         "cset w21, hs",
-        "adc w20, w27, w20",
-        "uxth w26, w20",
-        "cmp w26, #0x100 (256)",
-        "cset x20, lo",
-        "cmp w26, #0x100 (256)",
-        "cset x22, ls",
+        "adc w22, w23, w20",
+        "uxth w20, w22",
+        "cmp w20, #0x100 (256)",
+        "cset x22, lo",
+        "cmp w20, #0x100 (256)",
+        "cset x24, ls",
         "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w26, w27",
-        "rmif x20, #15, #nzcV"
+        "csel x25, x24, x22, eq",
+        "cmn wzr, w20, lsl #16",
+        "rmif x25, #63, #nzCv",
+        "bic w21, w20, w23",
+        "rmif x21, #15, #nzcV",
+        "mov x26, x20"
       ]
     },
     "lock adc word [rax], 0xFFFF": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 20,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
         "adc w21, wzr, w20",
-        "ldaddalh w21, w21, [x4]",
-        "mvn w27, w21",
-        "cset w22, hs",
-        "adc w23, w21, w20",
-        "uxth w26, w23",
-        "cmp w26, w20",
-        "cset x23, lo",
-        "cmp w26, w20",
-        "cset x20, ls",
-        "cmp x22, #0x1 (1)",
-        "csel x20, x20, x23, eq",
-        "cmn wzr, w26, lsl #16",
+        "mov x22, x4",
+        "ldaddalh w21, w23, [x22]",
+        "mvn w21, w23",
+        "mov x27, x21",
+        "cset w21, hs",
+        "adc w22, w23, w20",
+        "uxth w24, w22",
+        "cmp w24, w20",
+        "cset x22, lo",
+        "cmp w24, w20",
+        "cset x25, ls",
+        "cmp x21, #0x1 (1)",
+        "csel x20, x25, x22, eq",
+        "cmn wzr, w24, lsl #16",
         "rmif x20, #63, #nzCv",
-        "bic w20, w21, w26",
-        "rmif x20, #15, #nzcV"
+        "bic w20, w23, w24",
+        "rmif x20, #15, #nzcV",
+        "mov x26, x24"
       ]
     },
     "lock adc dword [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
         "adc w21, wzr, w20",
-        "ldaddal w21, w27, [x4]",
-        "adcs w26, w27, w20"
+        "mov x22, x4",
+        "ldaddal w21, w23, [x22]",
+        "mov x27, x23",
+        "adcs w21, w23, w20",
+        "mov x26, x21"
       ]
     },
     "lock adc dword [rax], 0xFFFFFFFF": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
         "adc w21, wzr, w20",
-        "ldaddal w21, w21, [x4]",
-        "mvn w27, w21",
-        "adcs w26, w21, w20"
+        "mov x22, x4",
+        "ldaddal w21, w23, [x22]",
+        "mvn w21, w23",
+        "mov x27, x21",
+        "adcs w21, w23, w20",
+        "mov x26, x21"
       ]
     },
     "lock adc qword [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
         "adc x21, xzr, x20",
-        "ldaddal x21, x27, [x4]",
-        "adcs x26, x27, x20"
+        "mov x22, x4",
+        "ldaddal x21, x23, [x22]",
+        "mov x27, x23",
+        "adcs x21, x23, x20",
+        "mov x26, x21"
       ]
     },
     "lock adc qword [rax], -2147483647": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffff80000001",
         "adc x21, xzr, x20",
-        "ldaddal x21, x27, [x4]",
-        "adcs x26, x27, x20"
+        "mov x22, x4",
+        "ldaddal x21, x23, [x22]",
+        "mov x27, x23",
+        "adcs x21, x23, x20",
+        "mov x26, x21"
       ]
     },
     "lock adc word [rax], 1": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 19,
       "Comment": "GROUP1 0x83 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
         "adc w21, wzr, w20",
-        "ldaddalh w21, w27, [x4]",
+        "mov x22, x4",
+        "ldaddalh w21, w23, [x22]",
+        "mov x27, x23",
         "cset w21, hs",
-        "adc w20, w27, w20",
-        "uxth w26, w20",
-        "cmp w26, #0x1 (1)",
-        "cset x20, lo",
-        "cmp w26, #0x1 (1)",
-        "cset x22, ls",
+        "adc w22, w23, w20",
+        "uxth w20, w22",
+        "cmp w20, #0x1 (1)",
+        "cset x22, lo",
+        "cmp w20, #0x1 (1)",
+        "cset x24, ls",
         "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w26, w27",
-        "rmif x20, #15, #nzcV"
+        "csel x25, x24, x22, eq",
+        "cmn wzr, w20, lsl #16",
+        "rmif x25, #63, #nzCv",
+        "bic w21, w20, w23",
+        "rmif x21, #15, #nzcV",
+        "mov x26, x20"
       ]
     },
     "lock adc dword [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x83 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
         "adc w21, wzr, w20",
-        "ldaddal w21, w27, [x4]",
-        "adcs w26, w27, w20"
+        "mov x22, x4",
+        "ldaddal w21, w23, [x22]",
+        "mov x27, x23",
+        "adcs w21, w23, w20",
+        "mov x26, x21"
       ]
     },
     "lock adc qword [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x83 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
         "adc x21, xzr, x20",
-        "ldaddal x21, x27, [x4]",
-        "adcs x26, x27, x20"
+        "mov x22, x4",
+        "ldaddal x21, x23, [x22]",
+        "mov x27, x23",
+        "adcs x21, x23, x20",
+        "mov x26, x21"
       ]
     },
     "lock sbb byte [rax], 1": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 21,
       "Comment": "GROUP1 0x80 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "adc w21, wzr, w20",
-        "neg w1, w21",
-        "ldaddalb w1, w27, [x4]",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddalb w1, w23, [x21]",
+        "mov x27, x23",
         "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w27, w20",
-        "uxtb w26, w20",
-        "cmp w26, w27",
+        "add w22, w20, w21",
+        "sub w20, w23, w22",
+        "uxtb w22, w20",
+        "cmp w22, w23",
         "cset x20, hi",
-        "cmp w26, w27",
-        "cset x22, hs",
+        "cmp w22, w23",
+        "cset x24, hs",
         "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w27, w26",
-        "rmif x20, #7, #nzcV"
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w22, lsl #24",
+        "rmif x25, #63, #nzCv",
+        "bic w20, w23, w22",
+        "rmif x20, #7, #nzcV",
+        "mov x26, x22"
       ]
     },
     "lock sbb byte [rax], 0xFF": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 22,
       "Comment": "GROUP1 0x80 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "adc w21, wzr, w20",
-        "neg w1, w21",
-        "ldaddalb w1, w21, [x4]",
-        "mvn w27, w21",
-        "cset w22, hs",
-        "add w20, w20, w22",
-        "sub w20, w21, w20",
-        "uxtb w26, w20",
-        "cmp w26, w21",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddalb w1, w23, [x21]",
+        "mvn w21, w23",
+        "mov x27, x21",
+        "cset w21, hs",
+        "add w22, w20, w21",
+        "sub w20, w23, w22",
+        "uxtb w22, w20",
+        "cmp w22, w23",
         "cset x20, hi",
-        "cmp w26, w21",
-        "cset x23, hs",
-        "cmp x22, #0x1 (1)",
-        "csel x20, x23, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w26, w21",
-        "rmif x20, #7, #nzcV"
+        "cmp w22, w23",
+        "cset x24, hs",
+        "cmp x21, #0x1 (1)",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w22, lsl #24",
+        "rmif x25, #63, #nzCv",
+        "bic w20, w22, w23",
+        "rmif x20, #7, #nzcV",
+        "mov x26, x22"
       ]
     },
     "lock sbb word [rax], 0x100": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 21,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "adc w21, wzr, w20",
-        "neg w1, w21",
-        "ldaddalh w1, w27, [x4]",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddalh w1, w23, [x21]",
+        "mov x27, x23",
         "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w27, w20",
-        "uxth w26, w20",
-        "cmp w26, w27",
+        "add w22, w20, w21",
+        "sub w20, w23, w22",
+        "uxth w22, w20",
+        "cmp w22, w23",
         "cset x20, hi",
-        "cmp w26, w27",
-        "cset x22, hs",
+        "cmp w22, w23",
+        "cset x24, hs",
         "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w27, w26",
-        "rmif x20, #15, #nzcV"
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w22, lsl #16",
+        "rmif x25, #63, #nzCv",
+        "bic w20, w23, w22",
+        "rmif x20, #15, #nzcV",
+        "mov x26, x22"
       ]
     },
     "lock sbb word [rax], 0xFFFF": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 22,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "adc w21, wzr, w20",
-        "neg w1, w21",
-        "ldaddalh w1, w21, [x4]",
-        "mvn w27, w21",
-        "cset w22, hs",
-        "add w20, w20, w22",
-        "sub w20, w21, w20",
-        "uxth w26, w20",
-        "cmp w26, w21",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddalh w1, w23, [x21]",
+        "mvn w21, w23",
+        "mov x27, x21",
+        "cset w21, hs",
+        "add w22, w20, w21",
+        "sub w20, w23, w22",
+        "uxth w22, w20",
+        "cmp w22, w23",
         "cset x20, hi",
-        "cmp w26, w21",
-        "cset x23, hs",
-        "cmp x22, #0x1 (1)",
-        "csel x20, x23, x20, eq",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w26, w21",
-        "rmif x20, #15, #nzcV"
+        "cmp w22, w23",
+        "cset x24, hs",
+        "cmp x21, #0x1 (1)",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w22, lsl #16",
+        "rmif x25, #63, #nzCv",
+        "bic w20, w22, w23",
+        "rmif x20, #15, #nzcV",
+        "mov x26, x22"
       ]
     },
     "lock sbb dword [rax], 0x100": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "adc w21, wzr, w20",
-        "neg w1, w21",
-        "ldaddal w1, w27, [x4]",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddal w1, w23, [x21]",
+        "mov x27, x23",
         "cfinv",
-        "sbcs w26, w27, w20",
-        "cfinv"
+        "sbcs w21, w23, w20",
+        "cfinv",
+        "mov x26, x21"
       ]
     },
     "lock sbb dword [rax], 0xFFFFFFFF": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "adc w21, wzr, w20",
-        "neg w1, w21",
-        "ldaddal w1, w21, [x4]",
-        "mvn w27, w21",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddal w1, w23, [x21]",
+        "mvn w21, w23",
+        "mov x27, x21",
         "cfinv",
-        "sbcs w26, w21, w20",
-        "cfinv"
+        "sbcs w21, w23, w20",
+        "cfinv",
+        "mov x26, x21"
       ]
     },
     "lock sbb qword [rax], 0x100": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "adc x21, xzr, x20",
-        "neg x1, x21",
-        "ldaddal x1, x27, [x4]",
+        "mov x21, x4",
+        "adc x22, xzr, x20",
+        "neg x1, x22",
+        "ldaddal x1, x23, [x21]",
+        "mov x27, x23",
         "cfinv",
-        "sbcs x26, x27, x20",
-        "cfinv"
+        "sbcs x21, x23, x20",
+        "cfinv",
+        "mov x26, x21"
       ]
     },
     "lock sbb qword [rax], -2147483647": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffff80000001",
-        "adc x21, xzr, x20",
-        "neg x1, x21",
-        "ldaddal x1, x27, [x4]",
+        "mov x21, x4",
+        "adc x22, xzr, x20",
+        "neg x1, x22",
+        "ldaddal x1, x23, [x21]",
+        "mov x27, x23",
         "cfinv",
-        "sbcs x26, x27, x20",
-        "cfinv"
+        "sbcs x21, x23, x20",
+        "cfinv",
+        "mov x26, x21"
       ]
     },
     "lock sbb word [rax], 1": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 21,
       "Comment": "GROUP1 0x83 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "adc w21, wzr, w20",
-        "neg w1, w21",
-        "ldaddalh w1, w27, [x4]",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddalh w1, w23, [x21]",
+        "mov x27, x23",
         "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w27, w20",
-        "uxth w26, w20",
-        "cmp w26, w27",
+        "add w22, w20, w21",
+        "sub w20, w23, w22",
+        "uxth w22, w20",
+        "cmp w22, w23",
         "cset x20, hi",
-        "cmp w26, w27",
-        "cset x22, hs",
+        "cmp w22, w23",
+        "cset x24, hs",
         "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w27, w26",
-        "rmif x20, #15, #nzcV"
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w22, lsl #16",
+        "rmif x25, #63, #nzCv",
+        "bic w20, w23, w22",
+        "rmif x20, #15, #nzcV",
+        "mov x26, x22"
       ]
     },
     "lock sbb dword [rax], 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x83 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "adc w21, wzr, w20",
-        "neg w1, w21",
-        "ldaddal w1, w27, [x4]",
+        "mov x21, x4",
+        "adc w22, wzr, w20",
+        "neg w1, w22",
+        "ldaddal w1, w23, [x21]",
+        "mov x27, x23",
         "cfinv",
-        "sbcs w26, w27, w20",
-        "cfinv"
+        "sbcs w21, w23, w20",
+        "cfinv",
+        "mov x26, x21"
       ]
     },
     "lock sbb qword [rax], 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x83 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "adc x21, xzr, x20",
-        "neg x1, x21",
-        "ldaddal x1, x27, [x4]",
+        "mov x21, x4",
+        "adc x22, xzr, x20",
+        "neg x1, x22",
+        "ldaddal x1, x23, [x21]",
+        "mov x27, x23",
         "cfinv",
-        "sbcs x26, x27, x20",
-        "cfinv"
+        "sbcs x21, x23, x20",
+        "cfinv",
+        "mov x26, x21"
       ]
     },
     "lock and byte [rax], 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x80 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
+        "mov x21, x4",
         "mvn w1, w20",
-        "ldclralb w1, w20, [x4]",
-        "and w26, w20, #0x1",
-        "cmn wzr, w26, lsl #24"
+        "ldclralb w1, w22, [x21]",
+        "and w20, w22, #0x1",
+        "cmn wzr, w20, lsl #24",
+        "mov x26, x20"
       ]
     },
     "lock and byte [rax], 0xFF": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x80 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
+        "mov x21, x4",
         "mvn w1, w20",
-        "ldclralb w1, w20, [x4]",
-        "and w26, w20, #0xff",
-        "cmn wzr, w26, lsl #24"
+        "ldclralb w1, w22, [x21]",
+        "and w20, w22, #0xff",
+        "cmn wzr, w20, lsl #24",
+        "mov x26, x20"
       ]
     },
     "lock and word [rax], 0x100": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
+        "mov x21, x4",
         "mvn w1, w20",
-        "ldclralh w1, w20, [x4]",
-        "and w26, w20, #0x100",
-        "cmn wzr, w26, lsl #16"
+        "ldclralh w1, w22, [x21]",
+        "and w20, w22, #0x100",
+        "cmn wzr, w20, lsl #16",
+        "mov x26, x20"
       ]
     },
     "lock and word [rax], 0xFFFF": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
+        "mov x21, x4",
         "mvn w1, w20",
-        "ldclralh w1, w20, [x4]",
-        "and w26, w20, #0xffff",
-        "cmn wzr, w26, lsl #16"
+        "ldclralh w1, w22, [x21]",
+        "and w20, w22, #0xffff",
+        "cmn wzr, w20, lsl #16",
+        "mov x26, x20"
       ]
     },
     "lock and dword [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
+        "mov x21, x4",
         "mvn w1, w20",
-        "ldclral w1, w20, [x4]",
-        "ands w26, w20, #0x100"
+        "ldclral w1, w22, [x21]",
+        "ands w20, w22, #0x100",
+        "mov x26, x20"
       ]
     },
     "lock and dword [rax], 0xFFFFFFFF": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
+        "mov x21, x4",
         "mvn w1, w20",
-        "ldclral w1, w21, [x4]",
-        "ands w26, w21, w20"
+        "ldclral w1, w22, [x21]",
+        "ands w21, w22, w20",
+        "mov x26, x21"
       ]
     },
     "lock and qword [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
+        "mov x21, x4",
         "mvn x1, x20",
-        "ldclral x1, x20, [x4]",
-        "ands x26, x20, #0x100"
+        "ldclral x1, x22, [x21]",
+        "ands x20, x22, #0x100",
+        "mov x26, x20"
       ]
     },
     "lock and qword [rax], -2147483647": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffff80000001",
+        "mov x21, x4",
         "mvn x1, x20",
-        "ldclral x1, x20, [x4]",
-        "ands x26, x20, #0xffffffff80000001"
+        "ldclral x1, x22, [x21]",
+        "ands x20, x22, #0xffffffff80000001",
+        "mov x26, x20"
       ]
     },
     "lock and word [rax], 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x83 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
+        "mov x21, x4",
         "mvn w1, w20",
-        "ldclralh w1, w20, [x4]",
-        "and w26, w20, #0x1",
-        "cmn wzr, w26, lsl #16"
+        "ldclralh w1, w22, [x21]",
+        "and w20, w22, #0x1",
+        "cmn wzr, w20, lsl #16",
+        "mov x26, x20"
       ]
     },
     "lock and dword [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
+        "mov x21, x4",
         "mvn w1, w20",
-        "ldclral w1, w20, [x4]",
-        "ands w26, w20, #0x1"
+        "ldclral w1, w22, [x21]",
+        "ands w20, w22, #0x1",
+        "mov x26, x20"
       ]
     },
     "lock and qword [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
+        "mov x21, x4",
         "mvn x1, x20",
-        "ldclral x1, x20, [x4]",
-        "ands x26, x20, #0x1"
+        "ldclral x1, x22, [x21]",
+        "ands x20, x22, #0x1",
+        "mov x26, x20"
       ]
     },
     "lock sub byte [rax], 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x80 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
+        "mov x21, x4",
         "neg w1, w20",
-        "ldaddalb w1, w27, [x4]",
-        "lsl w0, w27, #24",
+        "ldaddalb w1, w22, [x21]",
+        "mov x27, x22",
+        "lsl w0, w22, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w27, #0x1 (1)",
+        "sub w20, w22, #0x1 (1)",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "lock sub byte [rax], 0xFF": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP1 0x80 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
+        "mov x21, x4",
         "neg w1, w20",
-        "ldaddalb w1, w21, [x4]",
-        "mvn w27, w21",
-        "lsl w0, w21, #24",
+        "ldaddalb w1, w22, [x21]",
+        "mvn w21, w22",
+        "mov x27, x21",
+        "lsl w0, w22, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w21, #0xff (255)",
+        "sub w20, w22, #0xff (255)",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "lock sub word [rax], 0x100": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
+        "mov x21, x4",
         "neg w1, w20",
-        "ldaddalh w1, w27, [x4]",
-        "lsl w0, w27, #16",
+        "ldaddalh w1, w22, [x21]",
+        "mov x27, x22",
+        "lsl w0, w22, #16",
         "cmp w0, w20, lsl #16",
-        "sub w26, w27, #0x100 (256)",
+        "sub w20, w22, #0x100 (256)",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "lock sub word [rax], 0xFFFF": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
+        "mov x21, x4",
         "neg w1, w20",
-        "ldaddalh w1, w21, [x4]",
-        "mvn w27, w21",
-        "lsl w0, w21, #16",
+        "ldaddalh w1, w22, [x21]",
+        "mvn w21, w22",
+        "mov x27, x21",
+        "lsl w0, w22, #16",
         "cmp w0, w20, lsl #16",
-        "sub w26, w21, w20",
+        "sub w21, w22, w20",
+        "mov x26, x21",
         "cfinv"
       ]
     },
     "lock sub dword [rax], 0x100": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
+        "mov x21, x4",
         "neg w1, w20",
-        "ldaddal w1, w27, [x4]",
-        "subs w26, w27, #0x100 (256)",
+        "ldaddal w1, w22, [x21]",
+        "mov x27, x22",
+        "subs w20, w22, #0x100 (256)",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "lock sub dword [rax], 0xFFFFFFFF": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
+        "mov x21, x4",
         "neg w1, w20",
-        "ldaddal w1, w21, [x4]",
-        "mvn w27, w21",
-        "subs w26, w21, w20",
+        "ldaddal w1, w22, [x21]",
+        "mvn w21, w22",
+        "mov x27, x21",
+        "subs w21, w22, w20",
+        "mov x26, x21",
         "cfinv"
       ]
     },
     "lock sub qword [rax], 0x100": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
+        "mov x21, x4",
         "neg x1, x20",
-        "ldaddal x1, x27, [x4]",
-        "subs x26, x27, #0x100 (256)",
+        "ldaddal x1, x22, [x21]",
+        "mov x27, x22",
+        "subs x20, x22, #0x100 (256)",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "lock sub qword [rax], -2147483647": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffff80000001",
+        "mov x21, x4",
         "neg x1, x20",
-        "ldaddal x1, x27, [x4]",
-        "subs x26, x27, x20",
+        "ldaddal x1, x22, [x21]",
+        "mov x27, x22",
+        "subs x21, x22, x20",
+        "mov x26, x21",
         "cfinv"
       ]
     },
     "lock sub word [rax], 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x83 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
+        "mov x21, x4",
         "neg w1, w20",
-        "ldaddalh w1, w27, [x4]",
-        "lsl w0, w27, #16",
+        "ldaddalh w1, w22, [x21]",
+        "mov x27, x22",
+        "lsl w0, w22, #16",
         "cmp w0, w20, lsl #16",
-        "sub w26, w27, #0x1 (1)",
+        "sub w20, w22, #0x1 (1)",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "lock sub dword [rax], 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x83 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
+        "mov x21, x4",
         "neg w1, w20",
-        "ldaddal w1, w27, [x4]",
-        "subs w26, w27, #0x1 (1)",
+        "ldaddal w1, w22, [x21]",
+        "mov x27, x22",
+        "subs w20, w22, #0x1 (1)",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "lock sub qword [rax], 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x83 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
+        "mov x21, x4",
         "neg x1, x20",
-        "ldaddal x1, x27, [x4]",
-        "subs x26, x27, #0x1 (1)",
+        "ldaddal x1, x22, [x21]",
+        "mov x27, x22",
+        "subs x20, x22, #0x1 (1)",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "lock xor byte [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x80 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldeoralb w20, w20, [x4]",
-        "eor w26, w20, #0x1",
-        "cmn wzr, w26, lsl #24"
+        "mov x21, x4",
+        "ldeoralb w20, w22, [x21]",
+        "eor w20, w22, #0x1",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #24"
       ]
     },
     "lock xor byte [rax], 0xFF": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x80 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "ldeoralb w20, w20, [x4]",
-        "eor w26, w20, #0xff",
-        "cmn wzr, w26, lsl #24"
+        "mov x21, x4",
+        "ldeoralb w20, w22, [x21]",
+        "eor w20, w22, #0xff",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #24"
       ]
     },
     "lock xor word [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldeoralh w20, w20, [x4]",
-        "eor w26, w20, #0x100",
-        "cmn wzr, w26, lsl #16"
+        "mov x21, x4",
+        "ldeoralh w20, w22, [x21]",
+        "eor w20, w22, #0x100",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #16"
       ]
     },
     "lock xor word [rax], 0xFFFF": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "ldeoralh w20, w20, [x4]",
-        "eor w26, w20, #0xffff",
-        "cmn wzr, w26, lsl #16"
+        "mov x21, x4",
+        "ldeoralh w20, w22, [x21]",
+        "eor w20, w22, #0xffff",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #16"
       ]
     },
     "lock xor dword [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldeoral w20, w20, [x4]",
-        "eor w26, w20, #0x100",
-        "tst w26, w26"
+        "mov x21, x4",
+        "ldeoral w20, w22, [x21]",
+        "eor w20, w22, #0x100",
+        "mov x26, x20",
+        "tst w20, w20"
       ]
     },
     "lock xor dword [rax], 0xFFFFFFFF": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "ldeoral w20, w21, [x4]",
-        "eor w26, w21, w20",
-        "tst w26, w26"
+        "mov x21, x4",
+        "ldeoral w20, w22, [x21]",
+        "eor w21, w22, w20",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "lock xor qword [rax], 0x100": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "ldeoral x20, x20, [x4]",
-        "eor x26, x20, #0x100",
-        "tst x26, x26"
+        "mov x21, x4",
+        "ldeoral x20, x22, [x21]",
+        "eor x20, x22, #0x100",
+        "mov x26, x20",
+        "tst x20, x20"
       ]
     },
     "lock xor qword [rax], -2147483647": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffff80000001",
-        "ldeoral x20, x20, [x4]",
-        "eor x26, x20, #0xffffffff80000001",
-        "tst x26, x26"
+        "mov x21, x4",
+        "ldeoral x20, x22, [x21]",
+        "eor x20, x22, #0xffffffff80000001",
+        "mov x26, x20",
+        "tst x20, x20"
       ]
     },
     "lock xor word [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldeoralh w20, w20, [x4]",
-        "eor w26, w20, #0x1",
-        "cmn wzr, w26, lsl #16"
+        "mov x21, x4",
+        "ldeoralh w20, w22, [x21]",
+        "eor w20, w22, #0x1",
+        "mov x26, x20",
+        "cmn wzr, w20, lsl #16"
       ]
     },
     "lock xor dword [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldeoral w20, w20, [x4]",
-        "eor w26, w20, #0x1",
-        "tst w26, w26"
+        "mov x21, x4",
+        "ldeoral w20, w22, [x21]",
+        "eor w20, w22, #0x1",
+        "mov x26, x20",
+        "tst w20, w20"
       ]
     },
     "lock xor qword [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldeoral x20, x20, [x4]",
-        "eor x26, x20, #0x1",
-        "tst x26, x26"
+        "mov x21, x4",
+        "ldeoral x20, x22, [x21]",
+        "eor x20, x22, #0x1",
+        "mov x26, x20",
+        "tst x20, x20"
       ]
     },
     "lock dec byte [rax]": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP3 0xfe /1",
       "ExpectedArm64ASM": [
-        "mov w20, #0xff",
-        "ldaddalb w20, w27, [x4]",
-        "sub w26, w27, #0x1 (1)",
-        "setf8 w26",
-        "bic w20, w27, w26",
-        "rmif x20, #7, #nzcV"
+        "mov x20, x4",
+        "mov w21, #0xff",
+        "ldaddalb w21, w22, [x20]",
+        "sub w20, w22, #0x1 (1)",
+        "mov x26, x20",
+        "mov x27, x22",
+        "setf8 w20",
+        "bic w21, w22, w20",
+        "rmif x21, #7, #nzcV"
       ]
     },
     "lock not byte [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf6 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "steorlb w20, [x4]"
+        "mov x21, x4",
+        "steorlb w20, [x21]"
       ]
     },
     "lock not word [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf7 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "steorlh w20, [x4]"
+        "mov x21, x4",
+        "steorlh w20, [x21]"
       ]
     },
     "lock not dword [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf7 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "steorl w20, [x4]"
+        "mov x21, x4",
+        "steorl w20, [x21]"
       ]
     },
     "lock not qword [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf7 /2",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "steorl x20, [x4]"
+        "mov x21, x4",
+        "steorl x20, [x21]"
       ]
     },
     "lock neg byte [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP2 0xf6 /3",
       "ExpectedArm64ASM": [
-        "ldaxrb w1, [x4]",
+        "mov x20, x4",
+        "ldaxrb w1, [x20]",
         "neg w2, w1",
-        "stlxrb w3, w2, [x4]",
+        "stlxrb w3, w2, [x20]",
         "cbnz w3, #-0xc",
-        "mov w27, w1",
-        "cmp wzr, w27, lsl #24",
-        "neg w26, w27",
+        "mov w21, w1",
+        "mov x27, x21",
+        "cmp wzr, w21, lsl #24",
+        "neg w20, w21",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "lock neg word [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP2 0xf7 /3",
       "ExpectedArm64ASM": [
-        "ldaxrh w1, [x4]",
+        "mov x20, x4",
+        "ldaxrh w1, [x20]",
         "neg w2, w1",
-        "stlxrh w3, w2, [x4]",
+        "stlxrh w3, w2, [x20]",
         "cbnz w3, #-0xc",
-        "mov w27, w1",
-        "cmp wzr, w27, lsl #16",
-        "neg w26, w27",
+        "mov w21, w1",
+        "mov x27, x21",
+        "cmp wzr, w21, lsl #16",
+        "neg w20, w21",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "lock neg dword [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xf7 /3",
       "ExpectedArm64ASM": [
-        "ldaxr w1, [x4]",
+        "mov x20, x4",
+        "ldaxr w1, [x20]",
         "neg w2, w1",
-        "stlxr w3, w2, [x4]",
+        "stlxr w3, w2, [x20]",
         "cbnz w3, #-0xc",
-        "mov w27, w1",
-        "negs w26, w27",
+        "mov w21, w1",
+        "mov x27, x21",
+        "negs w20, w21",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "lock neg qword [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xf7 /3",
       "ExpectedArm64ASM": [
-        "ldaxr x1, [x4]",
+        "mov x20, x4",
+        "ldaxr x1, [x20]",
         "neg x2, x1",
-        "stlxr w3, x2, [x4]",
+        "stlxr w3, x2, [x20]",
         "cbnz x3, #-0xc",
-        "mov x27, x1",
-        "negs x26, x27",
+        "mov x21, x1",
+        "mov x27, x21",
+        "negs x20, x21",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "lock dec word [rax]": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP4 0xfe /1",
       "ExpectedArm64ASM": [
-        "mov w20, #0xffff",
-        "ldaddalh w20, w27, [x4]",
-        "sub w26, w27, #0x1 (1)",
-        "setf16 w26",
-        "bic w20, w27, w26",
-        "rmif x20, #15, #nzcV"
+        "mov x20, x4",
+        "mov w21, #0xffff",
+        "ldaddalh w21, w22, [x20]",
+        "sub w20, w22, #0x1 (1)",
+        "mov x26, x20",
+        "mov x27, x22",
+        "setf16 w20",
+        "bic w21, w22, w20",
+        "rmif x21, #15, #nzcV"
       ]
     },
     "lock dec dword [rax]": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP4 0xfe /1",
       "ExpectedArm64ASM": [
-        "mov w20, #0xffffffff",
-        "ldaddal w20, w27, [x4]",
+        "mov x20, x4",
+        "mov w21, #0xffffffff",
+        "ldaddal w21, w22, [x20]",
         "cset w20, hs",
-        "subs w26, w27, #0x1 (1)",
+        "mov x27, x22",
+        "subs w21, w22, #0x1 (1)",
+        "mov x26, x21",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock dec qword [rax]": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP4 0xfe /1",
       "ExpectedArm64ASM": [
-        "mov x20, #0xffffffffffffffff",
-        "ldaddal x20, x27, [x4]",
+        "mov x20, x4",
+        "mov x21, #0xffffffffffffffff",
+        "ldaddal x21, x22, [x20]",
         "cset w20, hs",
-        "subs x26, x27, #0x1 (1)",
+        "mov x27, x22",
+        "subs x21, x22, #0x1 (1)",
+        "mov x26, x21",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock inc byte [rax]": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP4 0xfe /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldaddalb w20, w27, [x4]",
-        "add w26, w27, #0x1 (1)",
-        "setf8 w26",
-        "bic w20, w26, w27",
-        "rmif x20, #7, #nzcV"
+        "mov x21, x4",
+        "ldaddalb w20, w22, [x21]",
+        "add w20, w22, #0x1 (1)",
+        "mov x26, x20",
+        "mov x27, x22",
+        "setf8 w20",
+        "bic w21, w20, w22",
+        "rmif x21, #7, #nzcV"
       ]
     },
     "lock inc word [rax]": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP4 0xfe /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldaddalh w20, w27, [x4]",
-        "add w26, w27, #0x1 (1)",
-        "setf16 w26",
-        "bic w20, w26, w27",
-        "rmif x20, #15, #nzcV"
+        "mov x21, x4",
+        "ldaddalh w20, w22, [x21]",
+        "add w20, w22, #0x1 (1)",
+        "mov x26, x20",
+        "mov x27, x22",
+        "setf16 w20",
+        "bic w21, w20, w22",
+        "rmif x21, #15, #nzcV"
       ]
     },
     "lock inc dword [rax]": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP4 0xfe /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldaddal w20, w27, [x4]",
+        "mov x21, x4",
+        "ldaddal w20, w22, [x21]",
         "cset w20, hs",
-        "adds w26, w27, #0x1 (1)",
+        "mov x27, x22",
+        "adds w21, w22, #0x1 (1)",
+        "mov x26, x21",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock inc qword [rax]": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP4 0xfe /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "ldaddal x20, x27, [x4]",
+        "mov x21, x4",
+        "ldaddal x20, x22, [x21]",
         "cset w20, hs",
-        "adds x26, x27, #0x1 (1)",
+        "mov x27, x22",
+        "adds x21, x22, #0x1 (1)",
+        "mov x26, x21",
         "rmif x20, #63, #nzCv"
       ]
     }
diff --git a/unittests/InstructionCountCI/FlagM/FlagOpts.json b/unittests/InstructionCountCI/FlagM/FlagOpts.json
index c4c12e9adc..21dac1d70f 100644
--- a/unittests/InstructionCountCI/FlagM/FlagOpts.json
+++ b/unittests/InstructionCountCI/FlagM/FlagOpts.json
@@ -11,304 +11,424 @@
   },
   "Instructions": {
     "Chained add": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 10,
       "x86Insts": [
         "add rax, rbx",
         "adc rcx, rcx"
       ],
       "ExpectedArm64ASM": [
-        "adds x4, x4, x7",
-        "mov w27, #0x0",
-        "adcs x26, x5, x5",
-        "mov x5, x26"
+        "mov x20, x7",
+        "mov x21, x4",
+        "adds x22, x21, x20",
+        "mov x4, x22",
+        "mov x20, x5",
+        "mov w21, #0x0",
+        "mov x27, x21",
+        "adcs x21, x20, x20",
+        "mov x26, x21",
+        "mov x5, x21"
       ]
     },
     "Chained sub": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 14,
       "x86Insts": [
         "sub rax, rbx",
         "sbb rcx, rdx"
       ],
       "ExpectedArm64ASM": [
-        "subs x4, x4, x7",
+        "mov x20, x7",
+        "mov x21, x4",
+        "subs x22, x21, x20",
         "cfinv",
-        "eor w27, w5, w6",
+        "mov x4, x22",
+        "mov x20, x6",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
         "cfinv",
-        "sbcs x26, x5, x6",
+        "sbcs x22, x21, x20",
         "cfinv",
-        "mov x5, x26"
+        "mov x26, x22",
+        "mov x5, x22"
       ]
     },
     "Inverted add": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 12,
       "x86Insts": [
         "add rax, rbx",
         "adc rcx, rdx",
         "cmc"
       ],
       "ExpectedArm64ASM": [
-        "adds x4, x4, x7",
-        "eor w27, w5, w6",
-        "adcs x26, x5, x6",
-        "mov x5, x26",
+        "mov x20, x7",
+        "mov x21, x4",
+        "adds x22, x21, x20",
+        "mov x4, x22",
+        "mov x20, x6",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "adcs x22, x21, x20",
+        "mov x26, x22",
+        "mov x5, x22",
         "cfinv"
       ]
     },
     "Inverted sub": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 14,
       "x86Insts": [
         "sub rax, rbx",
         "sbb rcx, rcx",
         "cmc"
       ],
       "ExpectedArm64ASM": [
-        "subs x4, x4, x7",
+        "mov x20, x7",
+        "mov x21, x4",
+        "subs x22, x21, x20",
         "cfinv",
-        "mov w27, #0x0",
+        "mov x4, x22",
+        "mov x20, x5",
+        "mov w21, #0x0",
+        "mov x27, x21",
         "cfinv",
-        "sbcs x26, x5, x5",
+        "sbcs x21, x20, x20",
         "cfinv",
-        "mov x5, x26",
+        "mov x26, x21",
+        "mov x5, x21",
         "cfinv"
       ]
     },
     "ADC dead": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 9,
       "x86Insts": [
         "add rax, rbx",
         "adc rcx, rcx",
         "test rcx, rcx"
       ],
       "ExpectedArm64ASM": [
-        "adds x4, x4, x7",
-        "adc x5, x5, x5",
-        "ands x26, x5, x5"
+        "mov x20, x7",
+        "mov x21, x4",
+        "adds x22, x21, x20",
+        "mov x4, x22",
+        "mov x20, x5",
+        "adc x21, x20, x20",
+        "mov x5, x21",
+        "ands x20, x21, x21",
+        "mov x26, x20"
       ]
     },
     "INC consumed": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "x86Insts": [
         "add rax, rbx",
         "inc rax"
       ],
       "ExpectedArm64ASM": [
-        "adds x4, x4, x7",
+        "mov x20, x7",
+        "mov x21, x4",
+        "adds x22, x21, x20",
+        "mov x4, x22",
         "cset w20, hs",
-        "mov x27, x4",
-        "adds x26, x4, #0x1 (1)",
+        "mov x27, x22",
+        "adds x21, x22, #0x1 (1)",
+        "mov x26, x21",
         "rmif x20, #63, #nzCv",
-        "mov x4, x26"
+        "mov x4, x21"
       ]
     },
     "INC dead": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 9,
       "x86Insts": [
         "add rax, rbx",
         "inc rax",
         "test rax, rdx"
       ],
       "ExpectedArm64ASM": [
-        "add x4, x4, x7",
-        "add x4, x4, #0x1 (1)",
-        "ands x26, x4, x6"
+        "mov x20, x7",
+        "mov x21, x4",
+        "add x22, x21, x20",
+        "mov x4, x22",
+        "add x20, x22, #0x1 (1)",
+        "mov x4, x20",
+        "mov x21, x6",
+        "ands x22, x20, x21",
+        "mov x26, x22"
       ]
     },
     "DEC consumed": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "x86Insts": [
         "sub rax, rbx",
         "dec rax"
       ],
       "ExpectedArm64ASM": [
-        "subs x4, x4, x7",
+        "mov x20, x7",
+        "mov x21, x4",
+        "subs x22, x21, x20",
         "cfinv",
+        "mov x4, x22",
         "cset w20, hs",
-        "mov x27, x4",
-        "subs x26, x4, #0x1 (1)",
+        "mov x27, x22",
+        "subs x21, x22, #0x1 (1)",
+        "mov x26, x21",
         "rmif x20, #63, #nzCv",
-        "mov x4, x26"
+        "mov x4, x21"
       ]
     },
     "DEC dead": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 9,
       "x86Insts": [
         "sub rax, rbx",
         "dec rax",
         "test rax, rcx"
       ],
       "ExpectedArm64ASM": [
-        "sub x4, x4, x7",
-        "sub x4, x4, #0x1 (1)",
-        "ands x26, x4, x5"
+        "mov x20, x7",
+        "mov x21, x4",
+        "sub x22, x21, x20",
+        "mov x4, x22",
+        "sub x20, x22, #0x1 (1)",
+        "mov x4, x20",
+        "mov x21, x5",
+        "ands x22, x20, x21",
+        "mov x26, x22"
       ]
     },
     "8-bit DEC consumed": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 19,
       "x86Insts": [
         "sub al, ah",
         "dec al"
       ],
       "ExpectedArm64ASM": [
-        "lsr w20, w4, #8",
-        "lsl w0, w4, #24",
-        "cmp w0, w20, lsl #24",
-        "sub w20, w4, w20",
+        "mov x20, x4",
+        "lsr w21, w20, #8",
+        "lsl w0, w20, #24",
+        "cmp w0, w21, lsl #24",
+        "sub w22, w20, w21",
         "cfinv",
-        "bfxil x4, x20, #0, #8",
-        "uxtb w27, w4",
-        "sub w26, w27, #0x1 (1)",
-        "setf8 w26",
-        "bic w20, w27, w26",
-        "rmif x20, #7, #nzcV",
-        "bfxil x4, x26, #0, #8"
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #8",
+        "mov x4, x21",
+        "uxtb w20, w21",
+        "sub w22, w20, #0x1 (1)",
+        "mov x26, x22",
+        "mov x27, x20",
+        "setf8 w22",
+        "bic w23, w20, w22",
+        "rmif x23, #7, #nzcV",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x4, x20"
       ]
     },
     "8-bit DEC dead": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 14,
       "x86Insts": [
         "sub al, ah",
         "dec al",
         "test al, al"
       ],
       "ExpectedArm64ASM": [
-        "lsr w20, w4, #8",
-        "sub w20, w4, w20",
-        "bfxil x4, x20, #0, #8",
-        "uxtb w20, w4",
-        "sub w20, w20, #0x1 (1)",
-        "bfxil x4, x20, #0, #8",
-        "mov x26, x4",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "lsr w21, w20, #8",
+        "sub w22, w20, w21",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #8",
+        "mov x4, x21",
+        "uxtb w20, w21",
+        "sub w22, w20, #0x1 (1)",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x4, x20",
+        "mov x21, x20",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21"
       ]
     },
     "Variable shift dead": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 7,
       "x86Insts": [
         "sar rax, cl",
         "test rax, rdx"
       ],
       "ExpectedArm64ASM": [
-        "asr x4, x4, x5",
-        "ands x26, x4, x6"
+        "mov x20, x4",
+        "mov x21, x5",
+        "asr x22, x20, x21",
+        "mov x4, x22",
+        "mov x20, x6",
+        "ands x21, x22, x20",
+        "mov x26, x21"
       ]
     },
     "Variable rotate-through-carry dead": {
-      "ExpectedInstructionCount": 15,
+      "ExpectedInstructionCount": 22,
       "x86Insts": [
         "rcr rax, cl",
         "test rax, rdx"
       ],
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x3f",
-        "cbz x20, #+0x34",
-        "lsr x20, x4, x5",
-        "cset w21, hs",
-        "neg x22, x5",
-        "lsl x23, x4, x22",
-        "orr x20, x20, x23, lsl #1",
-        "sub x23, x5, #0x1 (1)",
-        "lsr x23, x4, x23",
-        "rmif x23, #63, #nzCv",
-        "lsl x21, x21, x22",
-        "orr x4, x20, x21",
-        "eor x20, x4, x4, lsr #1",
+        "mov x20, x5",
+        "and x21, x20, #0x3f",
+        "cbz x21, #+0x40",
+        "mov x20, x5",
+        "mov x21, x4",
+        "lsr x22, x21, x20",
+        "cset w23, hs",
+        "neg x24, x20",
+        "lsl x25, x21, x24",
+        "orr x30, x22, x25, lsl #1",
+        "sub x22, x20, #0x1 (1)",
+        "lsr x20, x21, x22",
+        "rmif x20, #63, #nzCv",
+        "lsl x20, x23, x24",
+        "orr x21, x30, x20",
+        "eor x20, x21, x21, lsr #1",
         "rmif x20, #62, #nzcV",
-        "ands x26, x4, x6"
+        "mov x4, x21",
+        "mov x20, x6",
+        "mov x21, x4",
+        "ands x22, x21, x20",
+        "mov x26, x22"
       ]
     },
     "Partial NZCV select (cmp)": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 11,
       "x86Insts": [
         "cmp rax, rbx",
         "setz cl",
         "test cl, cl"
       ],
       "ExpectedArm64ASM": [
-        "subs x20, x4, x7",
+        "mov x20, x7",
+        "mov x21, x4",
+        "subs x22, x21, x20",
         "cset x20, eq",
-        "bfxil x5, x20, #0, #8",
-        "mov x26, x5",
-        "cmn wzr, w26, lsl #24"
+        "mov x21, x5",
+        "mov x23, x21",
+        "bfxil x23, x20, #0, #8",
+        "mov x5, x23",
+        "mov x20, x23",
+        "cmn wzr, w20, lsl #24",
+        "mov x26, x20"
       ]
     },
     "Partial NZCV select (add)": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 12,
       "x86Insts": [
         "add rax, rbx",
         "setz cl",
         "test cl, cl"
       ],
       "ExpectedArm64ASM": [
-        "adds x4, x4, x7",
+        "mov x20, x7",
+        "mov x21, x4",
+        "adds x22, x21, x20",
+        "mov x4, x22",
         "cset x20, eq",
-        "bfxil x5, x20, #0, #8",
-        "mov x26, x5",
-        "cmn wzr, w26, lsl #24"
+        "mov x21, x5",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x5, x22",
+        "mov x20, x22",
+        "cmn wzr, w20, lsl #24",
+        "mov x26, x20"
       ]
     },
     "AND use only ZF": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 12,
       "x86Insts": [
         "and eax, ebx",
         "setz cl",
         "test cl, cl"
       ],
       "ExpectedArm64ASM": [
-        "ands w4, w4, w7",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ands w22, w21, w20",
+        "mov x4, x22",
         "cset x20, eq",
-        "bfxil x5, x20, #0, #8",
-        "mov x26, x5",
-        "cmn wzr, w26, lsl #24"
+        "mov x21, x5",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x5, x22",
+        "mov x20, x22",
+        "cmn wzr, w20, lsl #24",
+        "mov x26, x20"
       ]
     },
     "AND use only PF": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 15,
       "x86Insts": [
         "and eax, ebx",
         "setp cl",
         "test cl, cl"
       ],
       "ExpectedArm64ASM": [
-        "and w4, w4, w7",
-        "eor w20, w4, w4, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "and x20, x20, #0x1",
-        "bfxil x5, x20, #0, #8",
-        "mov x26, x5",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x7",
+        "mov x21, x4",
+        "and w22, w21, w20",
+        "mov x4, x22",
+        "eor w20, w22, w22, lsr #4",
+        "eor w21, w20, w20, lsr #2",
+        "eon w20, w21, w21, lsr #1",
+        "and x21, x20, #0x1",
+        "mov x20, x5",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x5, x22",
+        "mov x20, x22",
+        "cmn wzr, w20, lsl #24",
+        "mov x26, x20"
       ]
     },
     "Dead cmpxchg flags": {
-      "ExpectedInstructionCount": 23,
+      "ExpectedInstructionCount": 39,
       "x86Insts": [
         "cmpxchg8b [rbp]",
         "test rax, rax"
       ],
       "ExpectedArm64ASM": [
-        "add x20, x9, #0x0 (0)",
-        "mov w21, w4",
-        "mov w22, w6",
-        "mov w23, w22",
-        "mov w22, w21",
-        "mov w21, w7",
-        "mov w24, w5",
-        "mov w25, w24",
-        "mov w24, w21",
-        "mov w2, w22",
-        "mov w3, w23",
-        "caspal w2, w3, w24, w25, [x20]",
+        "sub sp, sp, #0x40 (64)",
+        "mov x20, x9",
+        "add x21, x20, #0x0 (0)",
+        "mov x20, x4",
+        "mov w22, w20",
+        "mov x23, x6",
+        "mov w24, w23",
+        "mov x30, x24",
+        "mov w24, w22",
+        "mov w25, w30",
+        "mov x22, x7",
+        "mov w30, w22",
+        "mov x22, x5",
+        "mov w18, w22",
+        "str x23, [sp]",
+        "mov w22, w30",
+        "mov w23, w18",
+        "str x20, [sp, #32]",
+        "mov x30, x21",
+        "mov w2, w24",
+        "mov w3, w25",
+        "caspal w2, w3, w22, w23, [x30]",
         "mov w20, w2",
         "mov w21, w3",
-        "mov w24, w20",
-        "mov w25, w21",
+        "mov w22, w20",
+        "mov w23, w21",
         "mrs x0, nzcv",
-        "cmp w20, w22",
-        "ccmp w21, w23, #nzcv, eq",
+        "cmp w20, w24",
+        "ccmp w21, w25, #nzcv, eq",
         "rmif x0, #0, #NzCV",
-        "csel x4, x24, x4, ne",
-        "csel x6, x25, x6, ne",
-        "ands x26, x4, x4"
+        "ldr x20, [sp, #32]",
+        "csel x21, x22, x20, ne",
+        "mov x4, x21",
+        "ldr x20, [sp]",
+        "csel x22, x23, x20, ne",
+        "mov x6, x22",
+        "ands x20, x21, x21",
+        "mov x26, x20",
+        "add sp, sp, #0x40 (64)"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FlagM/H0F38.json b/unittests/InstructionCountCI/FlagM/H0F38.json
index fe2d76bb42..984e61e98b 100644
--- a/unittests/InstructionCountCI/FlagM/H0F38.json
+++ b/unittests/InstructionCountCI/FlagM/H0F38.json
@@ -12,107 +12,123 @@
   },
   "Instructions": {
     "ptest xmm0, xmm1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 16,
       "Comment": [
         "0x66 0x0f 0x38 0x17"
       ],
       "ExpectedArm64ASM": [
-        "and v2.16b, v16.16b, v17.16b",
-        "bic v3.16b, v17.16b, v16.16b",
-        "umaxv h2, v2.8h",
-        "umaxv h3, v3.8h",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "and v4.16b, v2.16b, v3.16b",
+        "bic v5.16b, v3.16b, v2.16b",
+        "umaxv h2, v4.8h",
+        "umaxv h3, v5.8h",
         "umov w20, v2.h[0]",
         "umov w21, v3.h[0]",
-        "mov w27, #0x0",
-        "mov w26, #0x1",
+        "mov w22, #0x0",
+        "mov w23, #0x1",
         "cmp x21, #0x0 (0)",
-        "cset x21, eq",
+        "cset x24, eq",
         "tst w20, w20",
-        "rmif x21, #63, #nzCv"
+        "rmif x24, #63, #nzCv",
+        "mov x26, x23",
+        "mov x27, x22"
       ]
     },
     "adcx eax, ebx": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "0x66 0x0f 0x38 0xf6"
       ],
       "ExpectedArm64ASM": [
         "cset w20, hs",
-        "mov w21, w7",
-        "mov w22, w4",
-        "add w23, w21, w20",
-        "add w4, w22, w23",
-        "mrs x22, nzcv",
-        "cmp w4, w21",
+        "mov x21, x7",
+        "mov w22, w21",
+        "mov x21, x4",
+        "mov w23, w21",
+        "add w21, w22, w20",
+        "add w24, w23, w21",
+        "mov x4, x24",
+        "mrs x21, nzcv",
+        "cmp w24, w22",
         "cset x23, lo",
-        "cmp w4, w21",
-        "cset x21, ls",
+        "cmp w24, w22",
+        "cset x25, ls",
         "cmp x20, #0x1 (1)",
-        "csel x20, x21, x23, eq",
-        "msr nzcv, x22",
-        "rmif x20, #63, #nzCv"
+        "csel x22, x25, x23, eq",
+        "msr nzcv, x21",
+        "rmif x22, #63, #nzCv"
       ]
     },
     "adcx rax, rbx": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0x66 REX.W 0x0f 0x38 0xf6"
       ],
       "ExpectedArm64ASM": [
         "cset w20, hs",
-        "add x21, x7, x20",
-        "add x4, x4, x21",
-        "mrs x21, nzcv",
-        "cmp x4, x7",
-        "cset x22, lo",
-        "cmp x4, x7",
-        "cset x23, ls",
+        "mov x21, x7",
+        "mov x22, x4",
+        "add x23, x21, x20",
+        "add x24, x22, x23",
+        "mov x4, x24",
+        "mrs x22, nzcv",
+        "cmp x24, x21",
+        "cset x23, lo",
+        "cmp x24, x21",
+        "cset x25, ls",
         "cmp x20, #0x1 (1)",
-        "csel x20, x23, x22, eq",
-        "msr nzcv, x21",
-        "rmif x20, #63, #nzCv"
+        "csel x21, x25, x23, eq",
+        "msr nzcv, x22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "adox eax, ebx": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "0xf3 0x0f 0x38 0xf6"
       ],
       "ExpectedArm64ASM": [
         "cset w20, vs",
-        "mov w21, w7",
-        "mov w22, w4",
-        "add w23, w21, w20",
-        "add w4, w22, w23",
-        "mrs x22, nzcv",
-        "cmp w4, w21",
+        "mov x21, x7",
+        "mov w22, w21",
+        "mov x21, x4",
+        "mov w23, w21",
+        "add w21, w22, w20",
+        "add w24, w23, w21",
+        "mov x4, x24",
+        "mrs x21, nzcv",
+        "cmp w24, w22",
         "cset x23, lo",
-        "cmp w4, w21",
-        "cset x21, ls",
+        "cmp w24, w22",
+        "cset x25, ls",
         "cmp x20, #0x1 (1)",
-        "csel x20, x21, x23, eq",
-        "msr nzcv, x22",
-        "rmif x20, #0, #nzcV"
+        "csel x22, x25, x23, eq",
+        "msr nzcv, x21",
+        "rmif x22, #0, #nzcV"
       ]
     },
     "adox rax, rbx": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xf3 REX.W 0x0f 0x38 0xf6"
       ],
       "ExpectedArm64ASM": [
         "cset w20, vs",
-        "add x21, x7, x20",
-        "add x4, x4, x21",
-        "mrs x21, nzcv",
-        "cmp x4, x7",
-        "cset x22, lo",
-        "cmp x4, x7",
-        "cset x23, ls",
+        "mov x21, x7",
+        "mov x22, x4",
+        "add x23, x21, x20",
+        "add x24, x22, x23",
+        "mov x4, x24",
+        "mrs x22, nzcv",
+        "cmp x24, x21",
+        "cset x23, lo",
+        "cmp x24, x21",
+        "cset x25, ls",
         "cmp x20, #0x1 (1)",
-        "csel x20, x23, x22, eq",
-        "msr nzcv, x21",
-        "rmif x20, #0, #nzcV"
+        "csel x21, x25, x23, eq",
+        "msr nzcv, x22",
+        "rmif x21, #0, #nzcV"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FlagM/HotBlocks.json b/unittests/InstructionCountCI/FlagM/HotBlocks.json
index 9838b0a9d9..434ee259e2 100644
--- a/unittests/InstructionCountCI/FlagM/HotBlocks.json
+++ b/unittests/InstructionCountCI/FlagM/HotBlocks.json
@@ -13,7 +13,7 @@
   },
   "Instructions": {
     "The Witcher 3": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 17,
       "x86Insts": [
         "mov eax, 0x1",
         "lock xadd qword [rcx], rax",
@@ -24,19 +24,27 @@
         "add rdx, rcx"
       ],
       "ExpectedArm64ASM": [
-        "mov w4, #0x1",
-        "ldaddal x4, x4, [x5]",
-        "mov x6, x4",
-        "and w6, w4, #0x1f",
-        "add x6, x6, #0x1 (1)",
-        "lsl x6, x6, #6",
-        "eor w27, w6, w5",
-        "adds x26, x6, x5",
-        "mov x6, x26"
+        "mov w20, #0x1",
+        "mov x4, x20",
+        "mov x21, x5",
+        "ldaddal x20, x22, [x21]",
+        "mov x4, x22",
+        "mov x6, x22",
+        "and w20, w22, #0x1f",
+        "mov x6, x20",
+        "add x22, x20, #0x1 (1)",
+        "mov x6, x22",
+        "lsl x20, x22, #6",
+        "mov x6, x20",
+        "eor w22, w20, w21",
+        "mov x27, x22",
+        "adds x22, x20, x21",
+        "mov x26, x22",
+        "mov x6, x22"
       ]
     },
     "FMOD scalar loop": {
-      "ExpectedInstructionCount": 88,
+      "ExpectedInstructionCount": 137,
       "x86Insts": [
         "mov     esi, ecx",
         "mov     rdx, rbp",
@@ -78,98 +86,147 @@
         "sub     esi, 0x1"
       ],
       "ExpectedArm64ASM": [
-        "mov w10, w5",
-        "mov x6, x9",
-        "mov x4, x7",
-        "ldr s18, [x6]",
-        "add x4, x4, #0x20 (32)",
-        "fmul s0, s18, s16",
-        "mov v18.s[0], v0.s[0]",
-        "add x6, x6, #0x20 (32)",
-        "sub x20, x4, #0x20 (32)",
-        "ldr s2, [x20]",
-        "fadd s0, s18, s2",
-        "mov v18.s[0], v0.s[0]",
-        "sub x20, x4, #0x20 (32)",
-        "str s18, [x20]",
-        "sub x20, x6, #0x1c (28)",
-        "ldr s18, [x20]",
-        "fmul s0, s18, s17",
-        "mov v18.s[0], v0.s[0]",
-        "sub x20, x4, #0x1c (28)",
-        "ldr s2, [x20]",
-        "fadd s0, s18, s2",
-        "mov v18.s[0], v0.s[0]",
-        "sub x20, x4, #0x1c (28)",
-        "str s18, [x20]",
-        "sub x20, x6, #0x18 (24)",
-        "ldr s18, [x20]",
-        "fmul s0, s18, s16",
-        "mov v18.s[0], v0.s[0]",
-        "sub x20, x4, #0x18 (24)",
-        "ldr s2, [x20]",
-        "fadd s0, s18, s2",
-        "mov v18.s[0], v0.s[0]",
-        "sub x20, x4, #0x18 (24)",
-        "str s18, [x20]",
-        "sub x20, x6, #0x14 (20)",
-        "ldr s18, [x20]",
-        "fmul s0, s18, s17",
-        "mov v18.s[0], v0.s[0]",
-        "sub x20, x4, #0x14 (20)",
-        "ldr s2, [x20]",
-        "fadd s0, s18, s2",
-        "mov v18.s[0], v0.s[0]",
-        "sub x20, x4, #0x14 (20)",
-        "str s18, [x20]",
-        "sub x20, x6, #0x10 (16)",
-        "ldr s18, [x20]",
-        "fmul s0, s18, s16",
-        "mov v18.s[0], v0.s[0]",
-        "sub x20, x4, #0x10 (16)",
-        "ldr s2, [x20]",
-        "fadd s0, s18, s2",
-        "mov v18.s[0], v0.s[0]",
-        "sub x20, x4, #0x10 (16)",
-        "str s18, [x20]",
-        "sub x20, x6, #0xc (12)",
-        "ldr s18, [x20]",
-        "fmul s0, s18, s17",
-        "mov v18.s[0], v0.s[0]",
-        "sub x20, x4, #0xc (12)",
-        "ldr s2, [x20]",
-        "fadd s0, s18, s2",
-        "mov v18.s[0], v0.s[0]",
-        "sub x20, x4, #0xc (12)",
-        "str s18, [x20]",
-        "sub x20, x6, #0x8 (8)",
-        "ldr s18, [x20]",
-        "fmul s0, s18, s16",
-        "mov v18.s[0], v0.s[0]",
-        "sub x20, x4, #0x8 (8)",
-        "ldr s2, [x20]",
-        "fadd s0, s18, s2",
-        "mov v18.s[0], v0.s[0]",
-        "sub x20, x4, #0x8 (8)",
-        "str s18, [x20]",
-        "sub x20, x6, #0x4 (4)",
-        "ldr s18, [x20]",
-        "fmul s0, s18, s17",
-        "mov v18.s[0], v0.s[0]",
-        "sub x20, x4, #0x4 (4)",
-        "ldr s2, [x20]",
-        "fadd s0, s18, s2",
-        "mov v18.s[0], v0.s[0]",
-        "sub x20, x4, #0x4 (4)",
-        "str s18, [x20]",
-        "mov x27, x10",
-        "subs w26, w10, #0x1 (1)",
+        "mov x20, x5",
+        "mov w21, w20",
+        "mov x10, x21",
+        "mov x20, x9",
+        "mov x6, x20",
+        "mov x22, x7",
+        "mov x4, x22",
+        "ldr s2, [x20]",
+        "mov v18.16b, v2.16b",
+        "add x23, x22, #0x20 (32)",
+        "mov x4, x23",
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v2.16b",
+        "fmul s0, s2, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov v18.16b, v4.16b",
+        "add x22, x20, #0x20 (32)",
+        "mov x6, x22",
+        "sub x20, x23, #0x20 (32)",
+        "ldr s2, [x20]",
+        "mov v5.16b, v4.16b",
+        "fadd s0, s4, s2",
+        "mov v5.s[0], v0.s[0]",
+        "mov v18.16b, v5.16b",
+        "sub x20, x23, #0x20 (32)",
+        "str s5, [x20]",
+        "sub x20, x22, #0x1c (28)",
+        "ldr s2, [x20]",
+        "mov v18.16b, v2.16b",
+        "mov v4.16b, v17.16b",
+        "mov v5.16b, v2.16b",
+        "fmul s0, s2, s4",
+        "mov v5.s[0], v0.s[0]",
+        "mov v18.16b, v5.16b",
+        "sub x20, x23, #0x1c (28)",
+        "ldr s2, [x20]",
+        "mov v6.16b, v5.16b",
+        "fadd s0, s5, s2",
+        "mov v6.s[0], v0.s[0]",
+        "mov v18.16b, v6.16b",
+        "sub x20, x23, #0x1c (28)",
+        "str s6, [x20]",
+        "sub x20, x22, #0x18 (24)",
+        "ldr s2, [x20]",
+        "mov v18.16b, v2.16b",
+        "mov v5.16b, v2.16b",
+        "fmul s0, s2, s3",
+        "mov v5.s[0], v0.s[0]",
+        "mov v18.16b, v5.16b",
+        "sub x20, x23, #0x18 (24)",
+        "ldr s2, [x20]",
+        "mov v6.16b, v5.16b",
+        "fadd s0, s5, s2",
+        "mov v6.s[0], v0.s[0]",
+        "mov v18.16b, v6.16b",
+        "sub x20, x23, #0x18 (24)",
+        "str s6, [x20]",
+        "sub x20, x22, #0x14 (20)",
+        "ldr s2, [x20]",
+        "mov v18.16b, v2.16b",
+        "mov v5.16b, v2.16b",
+        "fmul s0, s2, s4",
+        "mov v5.s[0], v0.s[0]",
+        "mov v18.16b, v5.16b",
+        "sub x20, x23, #0x14 (20)",
+        "ldr s2, [x20]",
+        "mov v6.16b, v5.16b",
+        "fadd s0, s5, s2",
+        "mov v6.s[0], v0.s[0]",
+        "mov v18.16b, v6.16b",
+        "sub x20, x23, #0x14 (20)",
+        "str s6, [x20]",
+        "sub x20, x22, #0x10 (16)",
+        "ldr s2, [x20]",
+        "mov v18.16b, v2.16b",
+        "mov v5.16b, v2.16b",
+        "fmul s0, s2, s3",
+        "mov v5.s[0], v0.s[0]",
+        "mov v18.16b, v5.16b",
+        "sub x20, x23, #0x10 (16)",
+        "ldr s2, [x20]",
+        "mov v6.16b, v5.16b",
+        "fadd s0, s5, s2",
+        "mov v6.s[0], v0.s[0]",
+        "mov v18.16b, v6.16b",
+        "sub x20, x23, #0x10 (16)",
+        "str s6, [x20]",
+        "sub x20, x22, #0xc (12)",
+        "ldr s2, [x20]",
+        "mov v18.16b, v2.16b",
+        "mov v5.16b, v2.16b",
+        "fmul s0, s2, s4",
+        "mov v5.s[0], v0.s[0]",
+        "mov v18.16b, v5.16b",
+        "sub x20, x23, #0xc (12)",
+        "ldr s2, [x20]",
+        "mov v6.16b, v5.16b",
+        "fadd s0, s5, s2",
+        "mov v6.s[0], v0.s[0]",
+        "mov v18.16b, v6.16b",
+        "sub x20, x23, #0xc (12)",
+        "str s6, [x20]",
+        "sub x20, x22, #0x8 (8)",
+        "ldr s2, [x20]",
+        "mov v18.16b, v2.16b",
+        "mov v5.16b, v2.16b",
+        "fmul s0, s2, s3",
+        "mov v5.s[0], v0.s[0]",
+        "mov v18.16b, v5.16b",
+        "sub x20, x23, #0x8 (8)",
+        "ldr s2, [x20]",
+        "mov v3.16b, v5.16b",
+        "fadd s0, s5, s2",
+        "mov v3.s[0], v0.s[0]",
+        "mov v18.16b, v3.16b",
+        "sub x20, x23, #0x8 (8)",
+        "str s3, [x20]",
+        "sub x20, x22, #0x4 (4)",
+        "ldr s2, [x20]",
+        "mov v18.16b, v2.16b",
+        "mov v3.16b, v2.16b",
+        "fmul s0, s2, s4",
+        "mov v3.s[0], v0.s[0]",
+        "mov v18.16b, v3.16b",
+        "sub x20, x23, #0x4 (4)",
+        "ldr s2, [x20]",
+        "mov v4.16b, v3.16b",
+        "fadd s0, s3, s2",
+        "mov v4.s[0], v0.s[0]",
+        "mov v18.16b, v4.16b",
+        "sub x20, x23, #0x4 (4)",
+        "str s4, [x20]",
+        "mov x27, x21",
+        "subs w20, w21, #0x1 (1)",
+        "mov x26, x20",
         "cfinv",
-        "mov x10, x26"
+        "mov x10, x20"
       ]
     },
     "Scalar vector add loop": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 16,
       "Comment": [
         "Saw this in bytemark"
       ],
@@ -181,17 +238,26 @@
         "cmp     rsi, rax"
       ],
       "ExpectedArm64ASM": [
-        "ldr q16, [x16, x4, sxtx]",
-        "add v16.2d, v16.2d, v17.2d",
-        "str q16, [x16, x4, sxtx]",
-        "add x4, x4, #0x10 (16)",
-        "eor w27, w10, w4",
-        "subs x26, x10, x4",
+        "mov x20, x16",
+        "mov x21, x4",
+        "ldr q2, [x20, x21, sxtx]",
+        "mov v16.16b, v2.16b",
+        "mov v3.16b, v17.16b",
+        "add v4.2d, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b",
+        "str q4, [x20, x21, sxtx]",
+        "add x20, x21, #0x10 (16)",
+        "mov x4, x20",
+        "mov x21, x10",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs x22, x21, x20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "bytemark data xor loop": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 25,
       "Comment": [
         "Saw this in bytemark"
       ],
@@ -210,18 +276,27 @@
         "mov x20, x4",
         "mov x6, x20",
         "mov x5, x20",
-        "mov x19, x10",
-        "add x4, x20, #0x1 (1)",
-        "lsr x6, x20, #6",
-        "and w5, w20, #0x3f",
-        "lsl x19, x19, x5",
-        "add x20, x7, x6, lsl #3",
-        "ldr x20, [x20]",
-        "eor x20, x20, x19",
-        "add x21, x7, x6, lsl #3",
-        "str x20, [x21]",
-        "eor w27, w11, w4",
-        "subs x26, x11, x4",
+        "mov x21, x10",
+        "mov x19, x21",
+        "add x22, x20, #0x1 (1)",
+        "mov x4, x22",
+        "lsr x23, x20, #6",
+        "mov x6, x23",
+        "and w24, w20, #0x3f",
+        "mov x5, x24",
+        "lsl x20, x21, x24",
+        "mov x19, x20",
+        "mov x21, x7",
+        "add x24, x21, x23, lsl #3",
+        "ldr x25, [x24]",
+        "eor x24, x25, x20",
+        "add x20, x21, x23, lsl #3",
+        "str x24, [x20]",
+        "mov x20, x11",
+        "eor w21, w20, w22",
+        "mov x27, x21",
+        "subs x21, x20, x22",
+        "mov x26, x21",
         "cfinv"
       ]
     }
diff --git a/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json b/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json
index 8d5d9fb854..beaac703fc 100644
--- a/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json
+++ b/unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json
@@ -12,7 +12,7 @@
   },
   "Instructions": {
     "Sonic Mania movie player": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 32,
       "Comment": "Used to be hottest block in Sonic Mania",
       "x86Insts": [
         "movzx   edx, byte [esi+ecx]",
@@ -27,26 +27,42 @@
         "cmp     esi, ebx"
       ],
       "ExpectedArm64ASM": [
-        "add w20, w10, w5",
-        "ldrb w6, [x20]",
-        "add w20, w10, w11",
-        "ldrb w5, [x20]",
-        "orr w6, w6, #0xffff0000",
-        "lsl w6, w6, #8",
-        "add w10, w10, #0x1 (1)",
-        "orr w6, w6, w5",
-        "ldr w5, [x9, #12]",
-        "ldr w20, [x4]",
-        "orr w20, w20, w6",
-        "str w20, [x4]",
-        "add w4, w4, #0x4 (4)",
-        "eor w27, w10, w7",
-        "subs w26, w10, w7",
+        "mov w20, w10",
+        "mov w21, w5",
+        "add w22, w20, w21",
+        "ldrb w21, [x22]",
+        "mov w6, w21",
+        "mov w22, w11",
+        "add w23, w20, w22",
+        "ldrb w22, [x23]",
+        "mov w5, w22",
+        "orr w23, w21, #0xffff0000",
+        "mov w6, w23",
+        "lsl w21, w23, #8",
+        "mov w6, w21",
+        "add w23, w20, #0x1 (1)",
+        "mov w10, w23",
+        "orr w20, w21, w22",
+        "mov w6, w20",
+        "mov w21, w9",
+        "ldr w22, [x21, #12]",
+        "mov w5, w22",
+        "mov w21, w4",
+        "ldr w22, [x21]",
+        "orr w24, w22, w20",
+        "str w24, [x21]",
+        "add w20, w21, #0x4 (4)",
+        "mov w4, w20",
+        "mov w20, w7",
+        "eor w21, w23, w20",
+        "mov w27, w21",
+        "subs w21, w23, w20",
+        "mov w26, w21",
         "cfinv"
       ]
     },
     "wine mscrt.dll memmove": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 25,
       "Comment": "Hot in Sonic Mania",
       "x86Insts": [
         "movdqu  xmm0, [esi]",
@@ -63,24 +79,35 @@
         "cmp     ecx, 0x40"
       ],
       "ExpectedArm64ASM": [
-        "ldr q16, [x10]",
-        "ldr q17, [x10, #16]",
-        "ldr q18, [x10, #32]",
-        "ldr q19, [x10, #48]",
-        "str q16, [x11]",
-        "str q17, [x11, #16]",
-        "str q18, [x11, #32]",
-        "str q19, [x11, #48]",
-        "add w10, w10, #0x40 (64)",
-        "add w11, w11, #0x40 (64)",
-        "sub w5, w5, #0x40 (64)",
-        "mov w27, w5",
-        "subs w26, w5, #0x40 (64)",
+        "mov w20, w10",
+        "ldr q2, [x20]",
+        "mov v16.16b, v2.16b",
+        "ldr q3, [x20, #16]",
+        "mov v17.16b, v3.16b",
+        "ldr q4, [x20, #32]",
+        "mov v18.16b, v4.16b",
+        "ldr q5, [x20, #48]",
+        "mov v19.16b, v5.16b",
+        "mov w21, w11",
+        "str q2, [x21]",
+        "str q3, [x21, #16]",
+        "str q4, [x21, #32]",
+        "str q5, [x21, #48]",
+        "add w22, w20, #0x40 (64)",
+        "mov w10, w22",
+        "add w20, w21, #0x40 (64)",
+        "mov w11, w20",
+        "mov w20, w5",
+        "sub w21, w20, #0x40 (64)",
+        "mov w5, w21",
+        "mov w27, w21",
+        "subs w20, w21, #0x40 (64)",
+        "mov w26, w20",
         "cfinv"
       ]
     },
     "dxvk hotblock from MGRR": {
-      "ExpectedInstructionCount": 40,
+      "ExpectedInstructionCount": 58,
       "Comment": [
         "Hottest block in Metal Gear Rising: Revengeance render thread"
       ],
@@ -101,50 +128,68 @@
         "lock cmpxchg8b qword [esi+0x8]"
       ],
       "ExpectedArm64ASM": [
-        "ldr w6, [x4, #12]",
-        "ldr w4, [x4, #8]",
-        "mov w20, #0xffffffcc",
-        "str w10, [x9, w20, sxtw]",
-        "mov w5, w4",
-        "mov w7, w6",
+        "sub sp, sp, #0x20 (32)",
+        "mov w20, w4",
+        "ldr w21, [x20, #12]",
+        "mov w6, w21",
+        "ldr w22, [x20, #8]",
+        "mov w4, w22",
+        "mov w20, w10",
+        "mov w23, w9",
+        "mov w24, #0xffffffcc",
+        "str w20, [x23, w24, sxtw]",
+        "mov w5, w22",
+        "mov w7, w21",
         "mov w20, #0xffffffdc",
-        "ldr w10, [x9, w20, sxtw]",
+        "ldr w24, [x23, w20, sxtw]",
+        "mov w10, w24",
         "mov w20, #0xffffffff",
-        "adds w21, w4, w20",
-        "mov w5, w21",
-        "mvn w27, w6",
-        "adcs w26, w6, w20",
-        "mov w7, w26",
+        "adds w25, w22, w20",
+        "mov w5, w25",
+        "mvn w12, w21",
+        "mov w27, w12",
+        "adcs w12, w21, w20",
+        "mov w26, w12",
+        "mov w7, w12",
         "mov w20, #0xffffffd8",
-        "str w21, [x9, w20, sxtw]",
+        "str w25, [x23, w20, sxtw]",
         "mov w20, #0xffffffd4",
-        "str w26, [x9, w20, sxtw]",
-        "mov w7, w21",
-        "mov w22, #0xffffffd0",
-        "str w21, [x9, w22, sxtw]",
-        "ldr w5, [x9, w20, sxtw]",
-        "add w20, w10, #0x8 (8)",
-        "mov w22, w4",
-        "mov w23, w6",
-        "mov w24, w21",
-        "mov w25, w5",
+        "str w12, [x23, w20, sxtw]",
+        "mov w7, w25",
+        "mov w12, #0xffffffd0",
+        "str w25, [x23, w12, sxtw]",
+        "ldr w12, [x23, w20, sxtw]",
+        "mov w5, w12",
+        "add w20, w24, #0x8 (8)",
+        "mov x24, x22",
+        "mov w22, w24",
+        "mov w23, w21",
+        "str w21, [sp]",
+        "mov x13, x20",
+        "mov w20, w25",
+        "mov w21, w12",
+        "mov x12, x24",
         "mov w2, w22",
         "mov w3, w23",
-        "caspal w2, w3, w24, w25, [x20]",
-        "mov w20, w2",
-        "mov w21, w3",
-        "mov w24, w20",
-        "mov w25, w21",
-        "mrs x0, nzcv",
-        "cmp w20, w22",
-        "ccmp w21, w23, #nzcv, eq",
+        "caspal w2, w3, w20, w21, [x13]",
+        "mov w24, w2",
+        "mov w25, w3",
+        "mov w20, w24",
+        "mov w21, w25",
+        "mrs x0, nzcv",
+        "cmp w24, w22",
+        "ccmp w25, w23, #nzcv, eq",
         "rmif x0, #0, #NzCV",
-        "csel x4, x24, x4, ne",
-        "csel x6, x25, x6, ne"
+        "csel x22, x20, x12, ne",
+        "mov w4, w22",
+        "ldr w20, [sp]",
+        "csel x22, x21, x20, ne",
+        "mov w6, w22",
+        "add sp, sp, #0x20 (32)"
       ]
     },
     "Psychonauts matrix swizzle": {
-      "ExpectedInstructionCount": 2426,
+      "ExpectedInstructionCount": 2513,
       "Comment": [
         "Hottest block in Windows Psychonauts",
         "Doing a 4x4 32-bit float matrix swizzle",
@@ -256,19 +301,24 @@
         "pop     ebp"
       ],
       "ExpectedArm64ASM": [
-        "mov w20, w8",
-        "str w9, [x20, #-4]!",
-        "mov w8, w20",
-        "mov w9, w20",
-        "mov w27, w20",
-        "subs w26, w20, #0x44 (68)",
+        "mov w20, w9",
+        "mov w21, w8",
+        "mov w22, w21",
+        "str w20, [x22, #-4]!",
+        "mov w8, w22",
+        "mov w9, w22",
+        "mov w27, w22",
+        "subs w20, w22, #0x44 (68)",
+        "mov w26, w20",
         "cfinv",
-        "mov w8, w26",
+        "mov w8, w20",
+        "mov w20, w5",
         "mov w21, #0xffffffbc",
-        "str w5, [x20, w21, sxtw]",
-        "ldr w4, [x20, w21, sxtw]",
-        "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "str w20, [x22, w21, sxtw]",
+        "ldr w20, [x22, w21, sxtw]",
+        "mov w4, w20",
+        "ldrb w22, [x28, #747]",
+        "ldr s2, [x20]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -293,21 +343,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "mov w22, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "mov w20, #0x1",
+        "sub w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
         "ldrb w23, [x28, #1026]",
-        "lsl w24, w22, w20",
-        "orr w23, w23, w24",
-        "strb w23, [x28, #1026]",
-        "strb w20, [x28, #747]",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "ldrb w20, [x28, #747]",
-        "add x0, x28, x20, lsl #4",
+        "lsl w24, w20, w22",
+        "orr w25, w23, w24",
+        "strb w25, [x28, #1026]",
+        "strb w22, [x28, #747]",
+        "add x0, x28, x22, lsl #4",
+        "str q3, [x0, #768]",
+        "ldrb w22, [x28, #747]",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -334,18 +384,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "mov w23, #0xffffffc0",
-        "str s2, [x9, w23, sxtw]",
-        "ldrb w23, [x28, #1026]",
-        "lsl w24, w22, w20",
-        "bic w23, w23, w24",
-        "strb w23, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "strb w20, [x28, #747]",
-        "ldr w5, [x9, w21, sxtw]",
-        "ldr s2, [x5, #16]",
+        "fmov s3, s0",
+        "mov w23, w9",
+        "mov w24, #0xffffffc0",
+        "str s3, [x23, w24, sxtw]",
+        "ldrb w24, [x28, #1026]",
+        "lsl w25, w20, w22",
+        "bic w12, w24, w25",
+        "strb w12, [x28, #1026]",
+        "add w24, w22, #0x1 (1)",
+        "and w22, w24, #0x7",
+        "strb w22, [x28, #747]",
+        "ldr w24, [x23, w21, sxtw]",
+        "mov w5, w24",
+        "ldr s2, [x24, #16]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -370,19 +422,19 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w24, w22, w20",
-        "orr w23, w23, w24",
-        "strb w23, [x28, #1026]",
-        "strb w20, [x28, #747]",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "ldrb w20, [x28, #747]",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "lsl w23, w20, w22",
+        "orr w24, w12, w23",
+        "strb w24, [x28, #1026]",
+        "strb w22, [x28, #747]",
+        "add x0, x28, x22, lsl #4",
+        "str q3, [x0, #768]",
+        "ldrb w22, [x28, #747]",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -409,18 +461,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "mov w23, #0xffffffc4",
-        "str s2, [x9, w23, sxtw]",
-        "ldrb w23, [x28, #1026]",
-        "lsl w24, w22, w20",
-        "bic w23, w23, w24",
-        "strb w23, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "strb w20, [x28, #747]",
-        "ldr w6, [x9, w21, sxtw]",
-        "ldr s2, [x6, #32]",
+        "fmov s3, s0",
+        "mov w23, w9",
+        "mov w24, #0xffffffc4",
+        "str s3, [x23, w24, sxtw]",
+        "ldrb w24, [x28, #1026]",
+        "lsl w25, w20, w22",
+        "bic w12, w24, w25",
+        "strb w12, [x28, #1026]",
+        "add w24, w22, #0x1 (1)",
+        "and w22, w24, #0x7",
+        "strb w22, [x28, #747]",
+        "ldr w24, [x23, w21, sxtw]",
+        "mov w6, w24",
+        "ldr s2, [x24, #32]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -445,19 +499,19 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w24, w22, w20",
-        "orr w23, w23, w24",
-        "strb w23, [x28, #1026]",
-        "strb w20, [x28, #747]",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "ldrb w20, [x28, #747]",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "lsl w23, w20, w22",
+        "orr w24, w12, w23",
+        "strb w24, [x28, #1026]",
+        "strb w22, [x28, #747]",
+        "add x0, x28, x22, lsl #4",
+        "str q3, [x0, #768]",
+        "ldrb w22, [x28, #747]",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -484,18 +538,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "mov w23, #0xffffffc8",
-        "str s2, [x9, w23, sxtw]",
-        "ldrb w23, [x28, #1026]",
-        "lsl w24, w22, w20",
-        "bic w23, w23, w24",
-        "strb w23, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "strb w20, [x28, #747]",
-        "ldr w4, [x9, w21, sxtw]",
-        "ldr s2, [x4, #48]",
+        "fmov s3, s0",
+        "mov w23, w9",
+        "mov w24, #0xffffffc8",
+        "str s3, [x23, w24, sxtw]",
+        "ldrb w24, [x28, #1026]",
+        "lsl w25, w20, w22",
+        "bic w12, w24, w25",
+        "strb w12, [x28, #1026]",
+        "add w24, w22, #0x1 (1)",
+        "and w22, w24, #0x7",
+        "strb w22, [x28, #747]",
+        "ldr w24, [x23, w21, sxtw]",
+        "mov w4, w24",
+        "ldr s2, [x24, #48]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -520,19 +576,19 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w24, w22, w20",
-        "orr w23, w23, w24",
-        "strb w23, [x28, #1026]",
-        "strb w20, [x28, #747]",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "ldrb w20, [x28, #747]",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "lsl w23, w20, w22",
+        "orr w24, w12, w23",
+        "strb w24, [x28, #1026]",
+        "strb w22, [x28, #747]",
+        "add x0, x28, x22, lsl #4",
+        "str q3, [x0, #768]",
+        "ldrb w22, [x28, #747]",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -559,18 +615,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "mov w23, #0xffffffcc",
-        "str s2, [x9, w23, sxtw]",
-        "ldrb w23, [x28, #1026]",
-        "lsl w24, w22, w20",
-        "bic w23, w23, w24",
-        "strb w23, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "strb w20, [x28, #747]",
-        "ldr w5, [x9, w21, sxtw]",
-        "ldr s2, [x5, #4]",
+        "fmov s3, s0",
+        "mov w23, w9",
+        "mov w24, #0xffffffcc",
+        "str s3, [x23, w24, sxtw]",
+        "ldrb w24, [x28, #1026]",
+        "lsl w25, w20, w22",
+        "bic w12, w24, w25",
+        "strb w12, [x28, #1026]",
+        "add w24, w22, #0x1 (1)",
+        "and w22, w24, #0x7",
+        "strb w22, [x28, #747]",
+        "ldr w24, [x23, w21, sxtw]",
+        "mov w5, w24",
+        "ldr s2, [x24, #4]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -595,19 +653,19 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w21, w22, w20",
-        "orr w21, w23, w21",
-        "strb w21, [x28, #1026]",
-        "strb w20, [x28, #747]",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "ldrb w20, [x28, #747]",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w21, w22, #0x1 (1)",
+        "and w22, w21, #0x7",
+        "lsl w21, w20, w22",
+        "orr w23, w12, w21",
+        "strb w23, [x28, #1026]",
+        "strb w22, [x28, #747]",
+        "add x0, x28, x22, lsl #4",
+        "str q3, [x0, #768]",
+        "ldrb w21, [x28, #747]",
+        "add x0, x28, x21, lsl #4",
         "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -634,19 +692,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "mov w21, #0xffffffd0",
-        "str s2, [x9, w21, sxtw]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "strb w20, [x28, #747]",
-        "mov w22, #0xffffffbc",
-        "ldr w6, [x9, w22, sxtw]",
-        "ldr s2, [x6, #20]",
+        "fmov s3, s0",
+        "mov w22, w9",
+        "mov w23, #0xffffffd0",
+        "str s3, [x22, w23, sxtw]",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w20, w21",
+        "bic w20, w23, w24",
+        "strb w20, [x28, #1026]",
+        "add w23, w21, #0x1 (1)",
+        "and w21, w23, #0x7",
+        "strb w21, [x28, #747]",
+        "mov w23, #0xffffffbc",
+        "ldr w24, [x22, w23, sxtw]",
+        "mov w6, w24",
+        "ldr s2, [x24, #20]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -671,18 +731,18 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "mov w23, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w24, w23, w20",
-        "orr w21, w21, w24",
-        "strb w21, [x28, #1026]",
-        "strb w20, [x28, #747]",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "mov w22, #0x1",
+        "sub w24, w21, #0x1 (1)",
+        "and w21, w24, #0x7",
+        "lsl w24, w22, w21",
+        "orr w25, w20, w24",
+        "strb w25, [x28, #1026]",
+        "strb w21, [x28, #747]",
+        "add x0, x28, x21, lsl #4",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -711,18 +771,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "mov w21, #0xffffffd4",
-        "str s2, [x9, w21, sxtw]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w24, w23, w20",
-        "bic w21, w21, w24",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "fmov s3, s0",
+        "mov w21, w9",
+        "mov w24, #0xffffffd4",
+        "str s3, [x21, w24, sxtw]",
+        "ldrb w24, [x28, #1026]",
+        "lsl w25, w22, w20",
+        "bic w12, w24, w25",
+        "strb w12, [x28, #1026]",
+        "add w24, w20, #0x1 (1)",
+        "and w20, w24, #0x7",
         "strb w20, [x28, #747]",
-        "ldr w4, [x9, w22, sxtw]",
-        "ldr s2, [x4, #36]",
+        "ldr w24, [x21, w23, sxtw]",
+        "mov w4, w24",
+        "ldr s2, [x24, #36]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -747,17 +809,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w24, w23, w20",
-        "orr w21, w21, w24",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "lsl w21, w22, w20",
+        "orr w24, w12, w21",
+        "strb w24, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -786,18 +848,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "mov w21, #0xffffffd8",
-        "str s2, [x9, w21, sxtw]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w24, w23, w20",
-        "bic w21, w21, w24",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "fmov s3, s0",
+        "mov w21, w9",
+        "mov w24, #0xffffffd8",
+        "str s3, [x21, w24, sxtw]",
+        "ldrb w24, [x28, #1026]",
+        "lsl w25, w22, w20",
+        "bic w12, w24, w25",
+        "strb w12, [x28, #1026]",
+        "add w24, w20, #0x1 (1)",
+        "and w20, w24, #0x7",
         "strb w20, [x28, #747]",
-        "ldr w5, [x9, w22, sxtw]",
-        "ldr s2, [x5, #52]",
+        "ldr w24, [x21, w23, sxtw]",
+        "mov w5, w24",
+        "ldr s2, [x24, #52]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -822,17 +886,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w24, w23, w20",
-        "orr w21, w21, w24",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "lsl w21, w22, w20",
+        "orr w24, w12, w21",
+        "strb w24, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -861,18 +925,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "mov w21, #0xffffffdc",
-        "str s2, [x9, w21, sxtw]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w24, w23, w20",
-        "bic w21, w21, w24",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "fmov s3, s0",
+        "mov w21, w9",
+        "mov w24, #0xffffffdc",
+        "str s3, [x21, w24, sxtw]",
+        "ldrb w24, [x28, #1026]",
+        "lsl w25, w22, w20",
+        "bic w12, w24, w25",
+        "strb w12, [x28, #1026]",
+        "add w24, w20, #0x1 (1)",
+        "and w20, w24, #0x7",
         "strb w20, [x28, #747]",
-        "ldr w6, [x9, w22, sxtw]",
-        "ldr s2, [x6, #8]",
+        "ldr w24, [x21, w23, sxtw]",
+        "mov w6, w24",
+        "ldr s2, [x24, #8]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -897,17 +963,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w24, w23, w20",
-        "orr w21, w21, w24",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "lsl w21, w22, w20",
+        "orr w24, w12, w21",
+        "strb w24, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -936,18 +1002,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "mov w21, #0xffffffe0",
-        "str s2, [x9, w21, sxtw]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w24, w23, w20",
-        "bic w21, w21, w24",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "fmov s3, s0",
+        "mov w21, w9",
+        "mov w24, #0xffffffe0",
+        "str s3, [x21, w24, sxtw]",
+        "ldrb w24, [x28, #1026]",
+        "lsl w25, w22, w20",
+        "bic w12, w24, w25",
+        "strb w12, [x28, #1026]",
+        "add w24, w20, #0x1 (1)",
+        "and w20, w24, #0x7",
         "strb w20, [x28, #747]",
-        "ldr w4, [x9, w22, sxtw]",
-        "ldr s2, [x4, #24]",
+        "ldr w24, [x21, w23, sxtw]",
+        "mov w4, w24",
+        "ldr s2, [x24, #24]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -972,17 +1040,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w22, w23, w20",
-        "orr w21, w21, w22",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "lsl w21, w22, w20",
+        "orr w23, w12, w21",
+        "strb w23, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -1011,19 +1079,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "mov w21, #0xffffffe4",
-        "str s2, [x9, w21, sxtw]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w23, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "fmov s3, s0",
+        "mov w21, w9",
+        "mov w23, #0xffffffe4",
+        "str s3, [x21, w23, sxtw]",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w22, w20",
+        "bic w22, w23, w24",
+        "strb w22, [x28, #1026]",
+        "add w23, w20, #0x1 (1)",
+        "and w20, w23, #0x7",
         "strb w20, [x28, #747]",
-        "mov w22, #0xffffffbc",
-        "ldr w5, [x9, w22, sxtw]",
-        "ldr s2, [x5, #40]",
+        "mov w23, #0xffffffbc",
+        "ldr w24, [x21, w23, sxtw]",
+        "mov w5, w24",
+        "ldr s2, [x24, #40]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -1048,18 +1118,18 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "mov w23, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w24, w23, w20",
-        "orr w21, w21, w24",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "mov w21, #0x1",
+        "sub w24, w20, #0x1 (1)",
+        "and w20, w24, #0x7",
+        "lsl w24, w21, w20",
+        "orr w25, w22, w24",
+        "strb w25, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -1088,18 +1158,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "mov w21, #0xffffffe8",
-        "str s2, [x9, w21, sxtw]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w24, w23, w20",
-        "bic w21, w21, w24",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "fmov s3, s0",
+        "mov w22, w9",
+        "mov w24, #0xffffffe8",
+        "str s3, [x22, w24, sxtw]",
+        "ldrb w24, [x28, #1026]",
+        "lsl w25, w21, w20",
+        "bic w12, w24, w25",
+        "strb w12, [x28, #1026]",
+        "add w24, w20, #0x1 (1)",
+        "and w20, w24, #0x7",
         "strb w20, [x28, #747]",
-        "ldr w6, [x9, w22, sxtw]",
-        "ldr s2, [x6, #56]",
+        "ldr w24, [x22, w23, sxtw]",
+        "mov w6, w24",
+        "ldr s2, [x24, #56]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -1124,17 +1196,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w24, w23, w20",
-        "orr w21, w21, w24",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "lsl w22, w21, w20",
+        "orr w24, w12, w22",
+        "strb w24, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -1163,18 +1235,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "mov w21, #0xffffffec",
-        "str s2, [x9, w21, sxtw]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w24, w23, w20",
-        "bic w21, w21, w24",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "fmov s3, s0",
+        "mov w22, w9",
+        "mov w24, #0xffffffec",
+        "str s3, [x22, w24, sxtw]",
+        "ldrb w24, [x28, #1026]",
+        "lsl w25, w21, w20",
+        "bic w12, w24, w25",
+        "strb w12, [x28, #1026]",
+        "add w24, w20, #0x1 (1)",
+        "and w20, w24, #0x7",
         "strb w20, [x28, #747]",
-        "ldr w4, [x9, w22, sxtw]",
-        "ldr s2, [x4, #12]",
+        "ldr w24, [x22, w23, sxtw]",
+        "mov w4, w24",
+        "ldr s2, [x24, #12]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -1199,17 +1273,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w24, w23, w20",
-        "orr w21, w21, w24",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "lsl w22, w21, w20",
+        "orr w24, w12, w22",
+        "strb w24, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -1238,18 +1312,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "mov w21, #0xfffffff0",
-        "str s2, [x9, w21, sxtw]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w24, w23, w20",
-        "bic w21, w21, w24",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "fmov s3, s0",
+        "mov w22, w9",
+        "mov w24, #0xfffffff0",
+        "str s3, [x22, w24, sxtw]",
+        "ldrb w24, [x28, #1026]",
+        "lsl w25, w21, w20",
+        "bic w12, w24, w25",
+        "strb w12, [x28, #1026]",
+        "add w24, w20, #0x1 (1)",
+        "and w20, w24, #0x7",
         "strb w20, [x28, #747]",
-        "ldr w5, [x9, w22, sxtw]",
-        "ldr s2, [x5, #28]",
+        "ldr w24, [x22, w23, sxtw]",
+        "mov w5, w24",
+        "ldr s2, [x24, #28]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -1274,17 +1350,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w24, w23, w20",
-        "orr w21, w21, w24",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "lsl w22, w21, w20",
+        "orr w24, w12, w22",
+        "strb w24, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -1313,18 +1389,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "mov w21, #0xfffffff4",
-        "str s2, [x9, w21, sxtw]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w24, w23, w20",
-        "bic w21, w21, w24",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "fmov s3, s0",
+        "mov w22, w9",
+        "mov w24, #0xfffffff4",
+        "str s3, [x22, w24, sxtw]",
+        "ldrb w24, [x28, #1026]",
+        "lsl w25, w21, w20",
+        "bic w12, w24, w25",
+        "strb w12, [x28, #1026]",
+        "add w24, w20, #0x1 (1)",
+        "and w20, w24, #0x7",
         "strb w20, [x28, #747]",
-        "ldr w6, [x9, w22, sxtw]",
-        "ldr s2, [x6, #44]",
+        "ldr w24, [x22, w23, sxtw]",
+        "mov w6, w24",
+        "ldr s2, [x24, #44]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -1349,17 +1427,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w22, w23, w20",
-        "orr w21, w21, w22",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "lsl w22, w21, w20",
+        "orr w23, w12, w22",
+        "strb w23, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -1388,19 +1466,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "mov w21, #0xfffffff8",
-        "str s2, [x9, w21, sxtw]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w23, w20",
-        "bic w21, w21, w22",
+        "fmov s3, s0",
+        "mov w22, w9",
+        "mov w23, #0xfffffff8",
+        "str s3, [x22, w23, sxtw]",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w23, w20, #0x1 (1)",
+        "and w20, w23, #0x7",
         "strb w20, [x28, #747]",
-        "mov w22, #0xffffffbc",
-        "ldr w4, [x9, w22, sxtw]",
-        "ldr s2, [x4, #60]",
+        "mov w23, #0xffffffbc",
+        "ldr w24, [x22, w23, sxtw]",
+        "mov w4, w24",
+        "ldr s2, [x24, #60]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -1425,18 +1505,18 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "mov w22, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w23, w20, #0x1 (1)",
+        "and w20, w23, #0x7",
         "lsl w23, w22, w20",
-        "orr w21, w21, w23",
-        "strb w21, [x28, #1026]",
+        "orr w24, w21, w23",
+        "strb w24, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -1465,19 +1545,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "mov w21, #0xfffffffc",
-        "str s2, [x9, w21, sxtw]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w22, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "fmov s3, s0",
+        "mov w21, w9",
+        "mov w23, #0xfffffffc",
+        "str s3, [x21, w23, sxtw]",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w22, w20",
+        "bic w25, w23, w24",
+        "strb w25, [x28, #1026]",
+        "add w23, w20, #0x1 (1)",
+        "and w20, w23, #0x7",
         "strb w20, [x28, #747]",
-        "ldr w5, [x9, #8]",
+        "ldr w23, [x21, #8]",
+        "mov w5, w23",
         "mov w23, #0xffffffc0",
-        "ldr s2, [x9, w23, sxtw]",
+        "ldr s2, [x21, w23, sxtw]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -1502,17 +1584,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w23, w22, w20",
-        "orr w21, w21, w23",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "lsl w21, w22, w20",
+        "orr w23, w25, w21",
+        "strb w23, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -1541,18 +1623,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x5]",
+        "fmov s3, s0",
+        "mov w21, w5",
+        "str s3, [x21]",
         "ldrb w21, [x28, #1026]",
         "lsl w23, w22, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "strb w20, [x28, #747]",
-        "ldr w6, [x9, #8]",
+        "bic w24, w21, w23",
+        "strb w24, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "strb w20, [x28, #747]",
+        "mov w21, w9",
+        "ldr w23, [x21, #8]",
+        "mov w6, w23",
         "mov w23, #0xffffffc4",
-        "ldr s2, [x9, w23, sxtw]",
+        "ldr s2, [x21, w23, sxtw]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -1577,17 +1662,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w23, w22, w20",
-        "orr w21, w21, w23",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "lsl w21, w22, w20",
+        "orr w23, w24, w21",
+        "strb w23, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -1616,18 +1701,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x6, #4]",
+        "fmov s3, s0",
+        "mov w21, w6",
+        "str s3, [x21, #4]",
         "ldrb w21, [x28, #1026]",
         "lsl w23, w22, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "strb w20, [x28, #747]",
-        "ldr w4, [x9, #8]",
+        "bic w24, w21, w23",
+        "strb w24, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "strb w20, [x28, #747]",
+        "mov w21, w9",
+        "ldr w23, [x21, #8]",
+        "mov w4, w23",
         "mov w23, #0xffffffc8",
-        "ldr s2, [x9, w23, sxtw]",
+        "ldr s2, [x21, w23, sxtw]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -1652,17 +1740,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w23, w22, w20",
-        "orr w21, w21, w23",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "lsl w21, w22, w20",
+        "orr w23, w24, w21",
+        "strb w23, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -1691,18 +1779,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x4, #8]",
+        "fmov s3, s0",
+        "mov w21, w4",
+        "str s3, [x21, #8]",
         "ldrb w21, [x28, #1026]",
         "lsl w23, w22, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "strb w20, [x28, #747]",
-        "ldr w5, [x9, #8]",
+        "bic w24, w21, w23",
+        "strb w24, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "strb w20, [x28, #747]",
+        "mov w21, w9",
+        "ldr w23, [x21, #8]",
+        "mov w5, w23",
         "mov w23, #0xffffffcc",
-        "ldr s2, [x9, w23, sxtw]",
+        "ldr s2, [x21, w23, sxtw]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -1727,17 +1818,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w23, w22, w20",
-        "orr w21, w21, w23",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "lsl w21, w22, w20",
+        "orr w23, w24, w21",
+        "strb w23, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -1766,18 +1857,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x5, #12]",
+        "fmov s3, s0",
+        "mov w21, w5",
+        "str s3, [x21, #12]",
         "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "ldr w6, [x9, #8]",
-        "mov w22, #0xffffffd0",
-        "ldr s2, [x9, w22, sxtw]",
+        "mov w21, w9",
+        "ldr w23, [x21, #8]",
+        "mov w6, w23",
+        "mov w23, #0xffffffd0",
+        "ldr s2, [x21, w23, sxtw]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -1802,18 +1896,18 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "mov w22, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w23, w22, w20",
-        "orr w21, w21, w23",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "mov w21, #0x1",
+        "sub w23, w20, #0x1 (1)",
+        "and w20, w23, #0x7",
+        "lsl w23, w21, w20",
+        "orr w24, w22, w23",
+        "strb w24, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -1842,18 +1936,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x6, #16]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w22, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "fmov s3, s0",
+        "mov w22, w6",
+        "str s3, [x22, #16]",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w24, w22, w23",
+        "strb w24, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "ldr w4, [x9, #8]",
+        "mov w22, w9",
+        "ldr w23, [x22, #8]",
+        "mov w4, w23",
         "mov w23, #0xffffffd4",
-        "ldr s2, [x9, w23, sxtw]",
+        "ldr s2, [x22, w23, sxtw]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -1878,17 +1975,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w23, w22, w20",
-        "orr w21, w21, w23",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "lsl w22, w21, w20",
+        "orr w23, w24, w22",
+        "strb w23, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -1917,18 +2014,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x4, #20]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w22, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "strb w20, [x28, #747]",
-        "ldr w5, [x9, #8]",
+        "fmov s3, s0",
+        "mov w22, w4",
+        "str s3, [x22, #20]",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w24, w22, w23",
+        "strb w24, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "strb w20, [x28, #747]",
+        "mov w22, w9",
+        "ldr w23, [x22, #8]",
+        "mov w5, w23",
         "mov w23, #0xffffffd8",
-        "ldr s2, [x9, w23, sxtw]",
+        "ldr s2, [x22, w23, sxtw]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -1953,17 +2053,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w23, w22, w20",
-        "orr w21, w21, w23",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "lsl w22, w21, w20",
+        "orr w23, w24, w22",
+        "strb w23, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -1992,18 +2092,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x5, #24]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w22, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "fmov s3, s0",
+        "mov w22, w5",
+        "str s3, [x22, #24]",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w24, w22, w23",
+        "strb w24, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "ldr w6, [x9, #8]",
+        "mov w22, w9",
+        "ldr w23, [x22, #8]",
+        "mov w6, w23",
         "mov w23, #0xffffffdc",
-        "ldr s2, [x9, w23, sxtw]",
+        "ldr s2, [x22, w23, sxtw]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -2028,17 +2131,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w23, w22, w20",
-        "orr w21, w21, w23",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "lsl w22, w21, w20",
+        "orr w23, w24, w22",
+        "strb w23, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -2067,18 +2170,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x6, #28]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w22, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "fmov s3, s0",
+        "mov w22, w6",
+        "str s3, [x22, #28]",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w24, w22, w23",
+        "strb w24, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "ldr w4, [x9, #8]",
+        "mov w22, w9",
+        "ldr w23, [x22, #8]",
+        "mov w4, w23",
         "mov w23, #0xffffffe0",
-        "ldr s2, [x9, w23, sxtw]",
+        "ldr s2, [x22, w23, sxtw]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -2103,17 +2209,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w23, w22, w20",
-        "orr w21, w21, w23",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "lsl w22, w21, w20",
+        "orr w23, w24, w22",
+        "strb w23, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -2142,18 +2248,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x4, #32]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
+        "fmov s3, s0",
+        "mov w22, w4",
+        "str s3, [x22, #32]",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "ldr w5, [x9, #8]",
-        "mov w22, #0xffffffe4",
-        "ldr s2, [x9, w22, sxtw]",
+        "mov w22, w9",
+        "ldr w23, [x22, #8]",
+        "mov w5, w23",
+        "mov w23, #0xffffffe4",
+        "ldr s2, [x22, w23, sxtw]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -2178,18 +2287,18 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "mov w22, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w23, w20, #0x1 (1)",
+        "and w20, w23, #0x7",
         "lsl w23, w22, w20",
-        "orr w21, w21, w23",
-        "strb w21, [x28, #1026]",
+        "orr w24, w21, w23",
+        "strb w24, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -2218,18 +2327,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x5, #36]",
+        "fmov s3, s0",
+        "mov w21, w5",
+        "str s3, [x21, #36]",
         "ldrb w21, [x28, #1026]",
         "lsl w23, w22, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "strb w20, [x28, #747]",
-        "ldr w6, [x9, #8]",
+        "bic w24, w21, w23",
+        "strb w24, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "strb w20, [x28, #747]",
+        "mov w21, w9",
+        "ldr w23, [x21, #8]",
+        "mov w6, w23",
         "mov w23, #0xffffffe8",
-        "ldr s2, [x9, w23, sxtw]",
+        "ldr s2, [x21, w23, sxtw]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -2254,17 +2366,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w23, w22, w20",
-        "orr w21, w21, w23",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "lsl w21, w22, w20",
+        "orr w23, w24, w21",
+        "strb w23, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -2293,18 +2405,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x6, #40]",
+        "fmov s3, s0",
+        "mov w21, w6",
+        "str s3, [x21, #40]",
         "ldrb w21, [x28, #1026]",
         "lsl w23, w22, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "strb w20, [x28, #747]",
-        "ldr w4, [x9, #8]",
+        "bic w24, w21, w23",
+        "strb w24, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "strb w20, [x28, #747]",
+        "mov w21, w9",
+        "ldr w23, [x21, #8]",
+        "mov w4, w23",
         "mov w23, #0xffffffec",
-        "ldr s2, [x9, w23, sxtw]",
+        "ldr s2, [x21, w23, sxtw]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -2329,17 +2444,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w23, w22, w20",
-        "orr w21, w21, w23",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "lsl w21, w22, w20",
+        "orr w23, w24, w21",
+        "strb w23, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -2368,18 +2483,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x4, #44]",
+        "fmov s3, s0",
+        "mov w21, w4",
+        "str s3, [x21, #44]",
         "ldrb w21, [x28, #1026]",
         "lsl w23, w22, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "strb w20, [x28, #747]",
-        "ldr w5, [x9, #8]",
+        "bic w24, w21, w23",
+        "strb w24, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "strb w20, [x28, #747]",
+        "mov w21, w9",
+        "ldr w23, [x21, #8]",
+        "mov w5, w23",
         "mov w23, #0xfffffff0",
-        "ldr s2, [x9, w23, sxtw]",
+        "ldr s2, [x21, w23, sxtw]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -2404,17 +2522,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w23, w22, w20",
-        "orr w21, w21, w23",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "lsl w21, w22, w20",
+        "orr w23, w24, w21",
+        "strb w23, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -2443,18 +2561,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x5, #48]",
+        "fmov s3, s0",
+        "mov w21, w5",
+        "str s3, [x21, #48]",
         "ldrb w21, [x28, #1026]",
         "lsl w23, w22, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "strb w20, [x28, #747]",
-        "ldr w6, [x9, #8]",
+        "bic w24, w21, w23",
+        "strb w24, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "strb w20, [x28, #747]",
+        "mov w21, w9",
+        "ldr w23, [x21, #8]",
+        "mov w6, w23",
         "mov w23, #0xfffffff4",
-        "ldr s2, [x9, w23, sxtw]",
+        "ldr s2, [x21, w23, sxtw]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -2479,17 +2600,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w23, w22, w20",
-        "orr w21, w21, w23",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "lsl w21, w22, w20",
+        "orr w23, w24, w21",
+        "strb w23, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -2518,18 +2639,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x6, #52]",
+        "fmov s3, s0",
+        "mov w21, w6",
+        "str s3, [x21, #52]",
         "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "ldr w4, [x9, #8]",
-        "mov w22, #0xfffffff8",
-        "ldr s2, [x9, w22, sxtw]",
+        "mov w21, w9",
+        "ldr w23, [x21, #8]",
+        "mov w4, w23",
+        "mov w23, #0xfffffff8",
+        "ldr s2, [x21, w23, sxtw]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -2554,18 +2678,18 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "mov w22, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w23, w22, w20",
-        "orr w21, w21, w23",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "mov w21, #0x1",
+        "sub w23, w20, #0x1 (1)",
+        "and w20, w23, #0x7",
+        "lsl w23, w21, w20",
+        "orr w24, w22, w23",
+        "strb w24, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -2594,18 +2718,21 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x4, #56]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w22, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "strb w20, [x28, #747]",
-        "ldr w5, [x9, #8]",
+        "fmov s3, s0",
+        "mov w22, w4",
+        "str s3, [x22, #56]",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w24, w22, w23",
+        "strb w24, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "strb w20, [x28, #747]",
+        "mov w22, w9",
+        "ldr w23, [x22, #8]",
+        "mov w5, w23",
         "mov w23, #0xfffffffc",
-        "ldr s2, [x9, w23, sxtw]",
+        "ldr s2, [x22, w23, sxtw]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -2630,17 +2757,17 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w23, w22, w20",
-        "orr w21, w21, w23",
-        "strb w21, [x28, #1026]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "lsl w22, w21, w20",
+        "orr w23, w24, w22",
+        "strb w23, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
+        "str q3, [x0, #768]",
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
@@ -2669,19 +2796,24 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x16, [sp], #16",
         "ldp x17, x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x5, #60]",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
+        "fmov s3, s0",
+        "mov w22, w5",
+        "str s3, [x22, #60]",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "mov w8, w9",
-        "ldr w4, [x8, #8]",
-        "ldr w9, [x8]",
-        "add x8, x8, #0x4 (4)"
+        "mov w20, w9",
+        "ldr w21, [x20, #8]",
+        "mov w4, w21",
+        "mov w8, w20",
+        "ldr w21, [x20]",
+        "add x22, x20, #0x4 (4)",
+        "mov w8, w22",
+        "mov w9, w21"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FlagM/HotBlocks_AFP.json b/unittests/InstructionCountCI/FlagM/HotBlocks_AFP.json
index f1871c91a3..3a18990c2e 100644
--- a/unittests/InstructionCountCI/FlagM/HotBlocks_AFP.json
+++ b/unittests/InstructionCountCI/FlagM/HotBlocks_AFP.json
@@ -13,7 +13,7 @@
   },
   "Instructions": {
     "FMOD scalar loop": {
-      "ExpectedInstructionCount": 72,
+      "ExpectedInstructionCount": 121,
       "x86Insts": [
         "mov     esi, ecx",
         "mov     rdx, rbp",
@@ -55,78 +55,127 @@
         "sub     esi, 0x1"
       ],
       "ExpectedArm64ASM": [
-        "mov w10, w5",
-        "mov x6, x9",
-        "mov x4, x7",
-        "ldr s18, [x6]",
-        "add x4, x4, #0x20 (32)",
-        "fmul s18, s18, s16",
-        "add x6, x6, #0x20 (32)",
-        "sub x20, x4, #0x20 (32)",
-        "ldr s2, [x20]",
-        "fadd s18, s18, s2",
-        "sub x20, x4, #0x20 (32)",
-        "str s18, [x20]",
-        "sub x20, x6, #0x1c (28)",
-        "ldr s18, [x20]",
-        "fmul s18, s18, s17",
-        "sub x20, x4, #0x1c (28)",
-        "ldr s2, [x20]",
-        "fadd s18, s18, s2",
-        "sub x20, x4, #0x1c (28)",
-        "str s18, [x20]",
-        "sub x20, x6, #0x18 (24)",
-        "ldr s18, [x20]",
-        "fmul s18, s18, s16",
-        "sub x20, x4, #0x18 (24)",
-        "ldr s2, [x20]",
-        "fadd s18, s18, s2",
-        "sub x20, x4, #0x18 (24)",
-        "str s18, [x20]",
-        "sub x20, x6, #0x14 (20)",
-        "ldr s18, [x20]",
-        "fmul s18, s18, s17",
-        "sub x20, x4, #0x14 (20)",
-        "ldr s2, [x20]",
-        "fadd s18, s18, s2",
-        "sub x20, x4, #0x14 (20)",
-        "str s18, [x20]",
-        "sub x20, x6, #0x10 (16)",
-        "ldr s18, [x20]",
-        "fmul s18, s18, s16",
-        "sub x20, x4, #0x10 (16)",
-        "ldr s2, [x20]",
-        "fadd s18, s18, s2",
-        "sub x20, x4, #0x10 (16)",
-        "str s18, [x20]",
-        "sub x20, x6, #0xc (12)",
-        "ldr s18, [x20]",
-        "fmul s18, s18, s17",
-        "sub x20, x4, #0xc (12)",
-        "ldr s2, [x20]",
-        "fadd s18, s18, s2",
-        "sub x20, x4, #0xc (12)",
-        "str s18, [x20]",
-        "sub x20, x6, #0x8 (8)",
-        "ldr s18, [x20]",
-        "fmul s18, s18, s16",
-        "sub x20, x4, #0x8 (8)",
-        "ldr s2, [x20]",
-        "fadd s18, s18, s2",
-        "sub x20, x4, #0x8 (8)",
-        "str s18, [x20]",
-        "sub x20, x6, #0x4 (4)",
-        "ldr s18, [x20]",
-        "fmul s18, s18, s17",
-        "sub x20, x4, #0x4 (4)",
-        "ldr s2, [x20]",
-        "fadd s18, s18, s2",
-        "sub x20, x4, #0x4 (4)",
-        "str s18, [x20]",
-        "mov x27, x10",
-        "subs w26, w10, #0x1 (1)",
+        "mov x20, x5",
+        "mov w21, w20",
+        "mov x10, x21",
+        "mov x20, x9",
+        "mov x6, x20",
+        "mov x22, x7",
+        "mov x4, x22",
+        "ldr s2, [x20]",
+        "mov v18.16b, v2.16b",
+        "add x23, x22, #0x20 (32)",
+        "mov x4, x23",
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v2.16b",
+        "fmul s4, s2, s3",
+        "mov v18.16b, v4.16b",
+        "add x22, x20, #0x20 (32)",
+        "mov x6, x22",
+        "sub x20, x23, #0x20 (32)",
+        "ldr s2, [x20]",
+        "mov v5.16b, v4.16b",
+        "fadd s5, s4, s2",
+        "mov v18.16b, v5.16b",
+        "sub x20, x23, #0x20 (32)",
+        "str s5, [x20]",
+        "sub x20, x22, #0x1c (28)",
+        "ldr s2, [x20]",
+        "mov v18.16b, v2.16b",
+        "mov v4.16b, v17.16b",
+        "mov v5.16b, v2.16b",
+        "fmul s5, s2, s4",
+        "mov v18.16b, v5.16b",
+        "sub x20, x23, #0x1c (28)",
+        "ldr s2, [x20]",
+        "mov v6.16b, v5.16b",
+        "fadd s6, s5, s2",
+        "mov v18.16b, v6.16b",
+        "sub x20, x23, #0x1c (28)",
+        "str s6, [x20]",
+        "sub x20, x22, #0x18 (24)",
+        "ldr s2, [x20]",
+        "mov v18.16b, v2.16b",
+        "mov v5.16b, v2.16b",
+        "fmul s5, s2, s3",
+        "mov v18.16b, v5.16b",
+        "sub x20, x23, #0x18 (24)",
+        "ldr s2, [x20]",
+        "mov v6.16b, v5.16b",
+        "fadd s6, s5, s2",
+        "mov v18.16b, v6.16b",
+        "sub x20, x23, #0x18 (24)",
+        "str s6, [x20]",
+        "sub x20, x22, #0x14 (20)",
+        "ldr s2, [x20]",
+        "mov v18.16b, v2.16b",
+        "mov v5.16b, v2.16b",
+        "fmul s5, s2, s4",
+        "mov v18.16b, v5.16b",
+        "sub x20, x23, #0x14 (20)",
+        "ldr s2, [x20]",
+        "mov v6.16b, v5.16b",
+        "fadd s6, s5, s2",
+        "mov v18.16b, v6.16b",
+        "sub x20, x23, #0x14 (20)",
+        "str s6, [x20]",
+        "sub x20, x22, #0x10 (16)",
+        "ldr s2, [x20]",
+        "mov v18.16b, v2.16b",
+        "mov v5.16b, v2.16b",
+        "fmul s5, s2, s3",
+        "mov v18.16b, v5.16b",
+        "sub x20, x23, #0x10 (16)",
+        "ldr s2, [x20]",
+        "mov v6.16b, v5.16b",
+        "fadd s6, s5, s2",
+        "mov v18.16b, v6.16b",
+        "sub x20, x23, #0x10 (16)",
+        "str s6, [x20]",
+        "sub x20, x22, #0xc (12)",
+        "ldr s2, [x20]",
+        "mov v18.16b, v2.16b",
+        "mov v5.16b, v2.16b",
+        "fmul s5, s2, s4",
+        "mov v18.16b, v5.16b",
+        "sub x20, x23, #0xc (12)",
+        "ldr s2, [x20]",
+        "mov v6.16b, v5.16b",
+        "fadd s6, s5, s2",
+        "mov v18.16b, v6.16b",
+        "sub x20, x23, #0xc (12)",
+        "str s6, [x20]",
+        "sub x20, x22, #0x8 (8)",
+        "ldr s2, [x20]",
+        "mov v18.16b, v2.16b",
+        "mov v5.16b, v2.16b",
+        "fmul s5, s2, s3",
+        "mov v18.16b, v5.16b",
+        "sub x20, x23, #0x8 (8)",
+        "ldr s2, [x20]",
+        "mov v3.16b, v5.16b",
+        "fadd s3, s5, s2",
+        "mov v18.16b, v3.16b",
+        "sub x20, x23, #0x8 (8)",
+        "str s3, [x20]",
+        "sub x20, x22, #0x4 (4)",
+        "ldr s2, [x20]",
+        "mov v18.16b, v2.16b",
+        "mov v3.16b, v2.16b",
+        "fmul s3, s2, s4",
+        "mov v18.16b, v3.16b",
+        "sub x20, x23, #0x4 (4)",
+        "ldr s2, [x20]",
+        "mov v4.16b, v3.16b",
+        "fadd s4, s3, s2",
+        "mov v18.16b, v4.16b",
+        "sub x20, x23, #0x4 (4)",
+        "str s4, [x20]",
+        "mov x27, x21",
+        "subs w20, w21, #0x1 (1)",
+        "mov x26, x20",
         "cfinv",
-        "mov x10, x26"
+        "mov x10, x20"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FlagM/Primary.json b/unittests/InstructionCountCI/FlagM/Primary.json
index b34877fcb8..95996178b4 100644
--- a/unittests/InstructionCountCI/FlagM/Primary.json
+++ b/unittests/InstructionCountCI/FlagM/Primary.json
@@ -12,2530 +12,3416 @@
   },
   "Instructions": {
     "add bl, cl": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x00",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "lsl w0, w7, #24",
-        "cmn w0, w5, lsl #24",
-        "add w26, w7, w5",
-        "bfxil x7, x26, #0, #8"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
+        "cmn w0, w20, lsl #24",
+        "add w22, w21, w20",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x7, x20"
       ]
     },
     "add bx, cx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x01",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "lsl w0, w7, #16",
-        "cmn w0, w5, lsl #16",
-        "add w26, w7, w5",
-        "bfxil x7, x26, #0, #16"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
+        "cmn w0, w20, lsl #16",
+        "add w22, w21, w20",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x7, x20"
       ]
     },
     "add ebx, ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x01",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "adds w26, w7, w5",
-        "mov x7, x26"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "adds w22, w21, w20",
+        "mov x26, x22",
+        "mov x7, x22"
       ]
     },
     "add rbx, rcx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x01",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "adds x26, x7, x5",
-        "mov x7, x26"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "adds x22, x21, x20",
+        "mov x26, x22",
+        "mov x7, x22"
       ]
     },
     "db 0x02, 0xcb": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "0x02",
         "add bl, cl but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "lsl w0, w5, #24",
-        "cmn w0, w7, lsl #24",
-        "add w26, w5, w7",
-        "bfxil x5, x26, #0, #8"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
+        "cmn w0, w20, lsl #24",
+        "add w22, w21, w20",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x5, x20"
       ]
     },
     "db 0x66, 0x03, 0xcb": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "0x03",
         "add bx, cx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "lsl w0, w5, #16",
-        "cmn w0, w7, lsl #16",
-        "add w26, w5, w7",
-        "bfxil x5, x26, #0, #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
+        "cmn w0, w20, lsl #16",
+        "add w22, w21, w20",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x5, x20"
       ]
     },
     "db 0x03, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x03",
         "add ebx, ecx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "adds w26, w5, w7",
-        "mov x5, x26"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "adds w22, w21, w20",
+        "mov x26, x22",
+        "mov x5, x22"
       ]
     },
     "db 0x48, 0x03, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x03",
         "add rbx, rcx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "adds x26, x5, x7",
-        "mov x5, x26"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "adds x22, x21, x20",
+        "mov x26, x22",
+        "mov x5, x22"
       ]
     },
     "add al, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x04",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #24",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #24",
         "cmn w0, w20, lsl #24",
-        "add w26, w27, #0x1 (1)",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #8"
+        "add w20, w21, #0x1 (1)",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "add ax, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x05",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w27, #0x1 (1)",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #16"
+        "add w20, w21, #0x1 (1)",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "add eax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x05",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "adds w26, w27, #0x1 (1)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mov x27, x20",
+        "adds w21, w20, #0x1 (1)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "add rax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x05",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "adds x26, x27, #0x1 (1)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mov x27, x20",
+        "adds x21, x20, #0x1 (1)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "add al, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x04",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "lsl w0, w4, #24",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
         "cmn w0, w20, lsl #24",
-        "add w26, w4, #0xff (255)",
-        "bfxil x4, x26, #0, #8"
+        "add w20, w21, #0xff (255)",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "add ax, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x05",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "mvn w27, w4",
-        "lsl w0, w4, #16",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w4, w20",
-        "bfxil x4, x26, #0, #16"
+        "add w22, w21, w20",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x4, x20"
       ]
     },
     "add eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x05",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "adds w26, w4, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "adds w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "add rax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x05",
       "ExpectedArm64ASM": [
-        "mvn w27, w4",
-        "subs x26, x4, #0x1 (1)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mvn w21, w20",
+        "mov x27, x21",
+        "subs x21, x20, #0x1 (1)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "or bl, bh": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": "",
       "ExpectedArm64ASM": [
-        "lsr w20, w7, #8",
-        "orr w26, w7, w20",
-        "bfxil x7, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x7",
+        "lsr w21, w20, #8",
+        "orr w22, w20, w21",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #8",
+        "mov x7, x21",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #24"
       ]
     },
     "or bl, cl": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x08",
       "ExpectedArm64ASM": [
-        "orr w26, w7, w5",
-        "bfxil x7, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x5",
+        "mov x21, x7",
+        "orr w22, w21, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x7, x20",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #24"
       ]
     },
     "or bx, cx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x09",
       "ExpectedArm64ASM": [
-        "orr w26, w7, w5",
-        "bfxil x7, x26, #0, #16",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x5",
+        "mov x21, x7",
+        "orr w22, w21, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x7, x20",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #16"
       ]
     },
     "or ebx, ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x09",
       "ExpectedArm64ASM": [
-        "orr w7, w7, w5",
-        "mov x26, x7",
-        "tst w7, w7"
+        "mov x20, x5",
+        "mov x21, x7",
+        "orr w22, w21, w20",
+        "mov x7, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "or rbx, rcx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x09",
       "ExpectedArm64ASM": [
-        "orr x7, x7, x5",
-        "mov x26, x7",
-        "tst x7, x7"
+        "mov x20, x5",
+        "mov x21, x7",
+        "orr x22, x21, x20",
+        "mov x7, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "db 0x0A, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x0A",
         "or bl, cl but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "orr w26, w5, w7",
-        "bfxil x5, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x7",
+        "mov x21, x5",
+        "orr w22, w21, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x5, x20",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #24"
       ]
     },
     "db 0x66, 0x0B, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x0B",
         "or bx, cx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "orr w26, w5, w7",
-        "bfxil x5, x26, #0, #16",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "orr w22, w21, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x5, x20",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #16"
       ]
     },
     "db 0x0B, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x0B",
         "or ebx, ecx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "orr w5, w5, w7",
-        "mov x26, x5",
-        "tst w5, w5"
+        "mov x20, x7",
+        "mov x21, x5",
+        "orr w22, w21, w20",
+        "mov x5, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "db 0x48, 0x0B, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x0B",
         "or rbx, rcx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "orr x5, x5, x7",
-        "mov x26, x5",
-        "tst x5, x5"
+        "mov x20, x7",
+        "mov x21, x5",
+        "orr x22, x21, x20",
+        "mov x5, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "or al, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0C",
       "ExpectedArm64ASM": [
-        "orr w26, w4, #0x1",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "orr w21, w20, #0x1",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "or ax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0D",
       "ExpectedArm64ASM": [
-        "orr w26, w4, #0x1",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x4",
+        "orr w21, w20, #0x1",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #16"
       ]
     },
     "or eax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0D",
       "ExpectedArm64ASM": [
-        "orr w4, w4, #0x1",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x4",
+        "orr w21, w20, #0x1",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "or rax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0D",
       "ExpectedArm64ASM": [
-        "orr x4, x4, #0x1",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x4",
+        "orr x21, x20, #0x1",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst x21, x21"
       ]
     },
     "or al, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0C",
       "ExpectedArm64ASM": [
-        "orr w26, w4, #0xff",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "orr w21, w20, #0xff",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "or ax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0D",
       "ExpectedArm64ASM": [
-        "orr w26, w4, #0xffff",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x4",
+        "orr w21, w20, #0xffff",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #16"
       ]
     },
     "or eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0D",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "orr w4, w4, w20",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x21, x4",
+        "orr w22, w21, w20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "or rax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0D",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "orr x4, x4, x20",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x21, x4",
+        "orr x22, x21, x20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "adc bl, cl": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 23,
       "Comment": "0x10",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "cset w20, hs",
-        "adc w21, w7, w5",
-        "uxtb w26, w21",
-        "cmp x26, x5",
-        "cset x21, lo",
-        "cmp x26, x5",
-        "cset x22, ls",
-        "cmp x20, #0x1 (1)",
-        "csel x20, x22, x21, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv",
-        "eor w20, w7, w5",
-        "eor w21, w26, w7",
-        "bic w20, w21, w20",
-        "rmif x20, #7, #nzcV",
-        "bfxil x7, x26, #0, #8"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxtb w24, w23",
+        "cmp x24, x20",
+        "cset x23, lo",
+        "cmp x24, x20",
+        "cset x25, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x30, x25, x23, eq",
+        "cmn wzr, w24, lsl #24",
+        "rmif x30, #63, #nzCv",
+        "eor w22, w21, w20",
+        "eor w20, w24, w21",
+        "bic w23, w20, w22",
+        "rmif x23, #7, #nzcV",
+        "mov x26, x24",
+        "mov x20, x21",
+        "bfxil x20, x24, #0, #8",
+        "mov x7, x20"
       ]
     },
     "adc bx, cx": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 23,
       "Comment": "0x11",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "cset w20, hs",
-        "adc w21, w7, w5",
-        "uxth w26, w21",
-        "cmp x26, x5",
-        "cset x21, lo",
-        "cmp x26, x5",
-        "cset x22, ls",
-        "cmp x20, #0x1 (1)",
-        "csel x20, x22, x21, eq",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #63, #nzCv",
-        "eor w20, w7, w5",
-        "eor w21, w26, w7",
-        "bic w20, w21, w20",
-        "rmif x20, #15, #nzcV",
-        "bfxil x7, x26, #0, #16"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxth w24, w23",
+        "cmp x24, x20",
+        "cset x23, lo",
+        "cmp x24, x20",
+        "cset x25, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x30, x25, x23, eq",
+        "cmn wzr, w24, lsl #16",
+        "rmif x30, #63, #nzCv",
+        "eor w22, w21, w20",
+        "eor w20, w24, w21",
+        "bic w23, w20, w22",
+        "rmif x23, #15, #nzcV",
+        "mov x26, x24",
+        "mov x20, x21",
+        "bfxil x20, x24, #0, #16",
+        "mov x7, x20"
       ]
     },
     "adc ebx, ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x11",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "adcs w26, w7, w5",
-        "mov x7, x26"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "adcs w22, w21, w20",
+        "mov x26, x22",
+        "mov x7, x22"
       ]
     },
     "adc rbx, rcx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x11",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "adcs x26, x7, x5",
-        "mov x7, x26"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "adcs x22, x21, x20",
+        "mov x26, x22",
+        "mov x7, x22"
       ]
     },
     "db 0x12, 0xcb": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 23,
       "Comment": [
         "0x12",
         "adc bl, cl but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "cset w20, hs",
-        "adc w21, w5, w7",
-        "uxtb w26, w21",
-        "cmp x26, x7",
-        "cset x21, lo",
-        "cmp x26, x7",
-        "cset x22, ls",
-        "cmp x20, #0x1 (1)",
-        "csel x20, x22, x21, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv",
-        "eor w20, w5, w7",
-        "eor w21, w26, w5",
-        "bic w20, w21, w20",
-        "rmif x20, #7, #nzcV",
-        "bfxil x5, x26, #0, #8"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxtb w24, w23",
+        "cmp x24, x20",
+        "cset x23, lo",
+        "cmp x24, x20",
+        "cset x25, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x30, x25, x23, eq",
+        "cmn wzr, w24, lsl #24",
+        "rmif x30, #63, #nzCv",
+        "eor w22, w21, w20",
+        "eor w20, w24, w21",
+        "bic w23, w20, w22",
+        "rmif x23, #7, #nzcV",
+        "mov x26, x24",
+        "mov x20, x21",
+        "bfxil x20, x24, #0, #8",
+        "mov x5, x20"
       ]
     },
     "db 0x66, 0x13, 0xcb": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 23,
       "Comment": [
         "0x13",
         "adc bx, cx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "cset w20, hs",
-        "adc w21, w5, w7",
-        "uxth w26, w21",
-        "cmp x26, x7",
-        "cset x21, lo",
-        "cmp x26, x7",
-        "cset x22, ls",
-        "cmp x20, #0x1 (1)",
-        "csel x20, x22, x21, eq",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #63, #nzCv",
-        "eor w20, w5, w7",
-        "eor w21, w26, w5",
-        "bic w20, w21, w20",
-        "rmif x20, #15, #nzcV",
-        "bfxil x5, x26, #0, #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxth w24, w23",
+        "cmp x24, x20",
+        "cset x23, lo",
+        "cmp x24, x20",
+        "cset x25, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x30, x25, x23, eq",
+        "cmn wzr, w24, lsl #16",
+        "rmif x30, #63, #nzCv",
+        "eor w22, w21, w20",
+        "eor w20, w24, w21",
+        "bic w23, w20, w22",
+        "rmif x23, #15, #nzcV",
+        "mov x26, x24",
+        "mov x20, x21",
+        "bfxil x20, x24, #0, #16",
+        "mov x5, x20"
       ]
     },
     "db 0x13, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x13",
         "adc ebx, ecx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "adcs w26, w5, w7",
-        "mov x5, x26"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "adcs w22, w21, w20",
+        "mov x26, x22",
+        "mov x5, x22"
       ]
     },
     "db 0x48, 0x13, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x13",
         "adc rbx, rcx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "adcs x26, x5, x7",
-        "mov x5, x26"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "adcs x22, x21, x20",
+        "mov x26, x22",
+        "mov x5, x22"
       ]
     },
     "adc al, 1": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 20,
       "Comment": "0x14",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "cset w21, hs",
-        "adc w20, w27, w20",
-        "uxtb w26, w20",
-        "cmp w26, #0x1 (1)",
-        "cset x20, lo",
-        "cmp w26, #0x1 (1)",
-        "cset x22, ls",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w26, w27",
-        "rmif x20, #7, #nzcV",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #8"
+        "mov x21, x4",
+        "mov x27, x21",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxtb w20, w23",
+        "cmp w20, #0x1 (1)",
+        "cset x23, lo",
+        "cmp w20, #0x1 (1)",
+        "cset x24, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x23, eq",
+        "cmn wzr, w20, lsl #24",
+        "rmif x25, #63, #nzCv",
+        "bic w22, w20, w21",
+        "rmif x22, #7, #nzcV",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "adc ax, 1": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 20,
       "Comment": "0x15",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "cset w21, hs",
-        "adc w20, w27, w20",
-        "uxth w26, w20",
-        "cmp w26, #0x1 (1)",
-        "cset x20, lo",
-        "cmp w26, #0x1 (1)",
-        "cset x22, ls",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w26, w27",
-        "rmif x20, #15, #nzcV",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #16"
+        "mov x21, x4",
+        "mov x27, x21",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxth w20, w23",
+        "cmp w20, #0x1 (1)",
+        "cset x23, lo",
+        "cmp w20, #0x1 (1)",
+        "cset x24, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x23, eq",
+        "cmn wzr, w20, lsl #16",
+        "rmif x25, #63, #nzCv",
+        "bic w22, w20, w21",
+        "rmif x22, #15, #nzcV",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "adc eax, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x15",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "adcs w26, w27, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adcs w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "adc rax, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x15",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "adcs x26, x27, x20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adcs x22, x21, x20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "adc al, -1": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 21,
       "Comment": "0x14",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "cset w21, hs",
-        "adc w20, w4, w20",
-        "uxtb w26, w20",
-        "cmp w26, #0xff (255)",
-        "cset x20, lo",
-        "cmp w26, #0xff (255)",
-        "cset x22, ls",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w4, w26",
-        "rmif x20, #7, #nzcV",
-        "bfxil x4, x26, #0, #8"
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxtb w20, w23",
+        "cmp w20, #0xff (255)",
+        "cset x23, lo",
+        "cmp w20, #0xff (255)",
+        "cset x24, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x23, eq",
+        "cmn wzr, w20, lsl #24",
+        "rmif x25, #63, #nzCv",
+        "bic w22, w21, w20",
+        "rmif x22, #7, #nzcV",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "adc ax, -1": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 21,
       "Comment": "0x15",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "mvn w27, w4",
-        "cset w21, hs",
-        "adc w22, w4, w20",
-        "uxth w26, w22",
-        "cmp w26, w20",
-        "cset x22, lo",
-        "cmp w26, w20",
-        "cset x20, ls",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x20, x22, eq",
-        "cmn wzr, w26, lsl #16",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxth w24, w23",
+        "cmp w24, w20",
+        "cset x23, lo",
+        "cmp w24, w20",
+        "cset x25, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x20, x25, x23, eq",
+        "cmn wzr, w24, lsl #16",
         "rmif x20, #63, #nzCv",
-        "bic w20, w4, w26",
+        "bic w20, w21, w24",
         "rmif x20, #15, #nzcV",
-        "bfxil x4, x26, #0, #16"
+        "mov x26, x24",
+        "mov x20, x21",
+        "bfxil x20, x24, #0, #16",
+        "mov x4, x20"
       ]
     },
     "adc eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x15",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "adcs w26, w4, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "adcs w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "adc rax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x15",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "mvn w27, w4",
-        "adcs x26, x4, x20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "adcs x22, x21, x20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sbb bl, cl": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 24,
       "Comment": "0x18",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "cset w20, hs",
-        "add w21, w5, w20",
-        "sub w21, w7, w21",
-        "uxtb w26, w21",
-        "cmp x26, x7",
-        "cset x21, hi",
-        "cmp x26, x7",
-        "cset x22, hs",
-        "cmp x20, #0x1 (1)",
-        "csel x20, x22, x21, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv",
-        "eor w20, w7, w5",
-        "eor w21, w26, w7",
-        "and w20, w21, w20",
-        "rmif x20, #7, #nzcV",
-        "bfxil x7, x26, #0, #8"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w24, w21, w23",
+        "uxtb w23, w24",
+        "cmp x23, x21",
+        "cset x24, hi",
+        "cmp x23, x21",
+        "cset x25, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x30, x25, x24, eq",
+        "cmn wzr, w23, lsl #24",
+        "rmif x30, #63, #nzCv",
+        "eor w22, w21, w20",
+        "eor w20, w23, w21",
+        "and w24, w20, w22",
+        "rmif x24, #7, #nzcV",
+        "mov x26, x23",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #8",
+        "mov x7, x20"
       ]
     },
     "sbb bx, cx": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 24,
       "Comment": "0x19",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "cset w20, hs",
-        "add w21, w5, w20",
-        "sub w21, w7, w21",
-        "uxth w26, w21",
-        "cmp x26, x7",
-        "cset x21, hi",
-        "cmp x26, x7",
-        "cset x22, hs",
-        "cmp x20, #0x1 (1)",
-        "csel x20, x22, x21, eq",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #63, #nzCv",
-        "eor w20, w7, w5",
-        "eor w21, w26, w7",
-        "and w20, w21, w20",
-        "rmif x20, #15, #nzcV",
-        "bfxil x7, x26, #0, #16"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w24, w21, w23",
+        "uxth w23, w24",
+        "cmp x23, x21",
+        "cset x24, hi",
+        "cmp x23, x21",
+        "cset x25, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x30, x25, x24, eq",
+        "cmn wzr, w23, lsl #16",
+        "rmif x30, #63, #nzCv",
+        "eor w22, w21, w20",
+        "eor w20, w23, w21",
+        "and w24, w20, w22",
+        "rmif x24, #15, #nzcV",
+        "mov x26, x23",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x7, x20"
       ]
     },
     "sbb ebx, ecx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x19",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
         "cfinv",
-        "sbcs w26, w7, w5",
+        "sbcs w22, w21, w20",
         "cfinv",
-        "mov x7, x26"
+        "mov x26, x22",
+        "mov x7, x22"
       ]
     },
     "sbb rbx, rcx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x19",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
         "cfinv",
-        "sbcs x26, x7, x5",
+        "sbcs x22, x21, x20",
         "cfinv",
-        "mov x7, x26"
+        "mov x26, x22",
+        "mov x7, x22"
       ]
     },
     "db 0x1A, 0xcb": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 24,
       "Comment": [
         "0x1A",
         "sbb bl, cl but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "cset w20, hs",
-        "add w21, w7, w20",
-        "sub w21, w5, w21",
-        "uxtb w26, w21",
-        "cmp x26, x5",
-        "cset x21, hi",
-        "cmp x26, x5",
-        "cset x22, hs",
-        "cmp x20, #0x1 (1)",
-        "csel x20, x22, x21, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv",
-        "eor w20, w5, w7",
-        "eor w21, w26, w5",
-        "and w20, w21, w20",
-        "rmif x20, #7, #nzcV",
-        "bfxil x5, x26, #0, #8"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w24, w21, w23",
+        "uxtb w23, w24",
+        "cmp x23, x21",
+        "cset x24, hi",
+        "cmp x23, x21",
+        "cset x25, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x30, x25, x24, eq",
+        "cmn wzr, w23, lsl #24",
+        "rmif x30, #63, #nzCv",
+        "eor w22, w21, w20",
+        "eor w20, w23, w21",
+        "and w24, w20, w22",
+        "rmif x24, #7, #nzcV",
+        "mov x26, x23",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #8",
+        "mov x5, x20"
       ]
     },
     "db 0x66, 0x1B, 0xcb": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 24,
       "Comment": [
         "0x1B",
         "sbb bx, cx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "cset w20, hs",
-        "add w21, w7, w20",
-        "sub w21, w5, w21",
-        "uxth w26, w21",
-        "cmp x26, x5",
-        "cset x21, hi",
-        "cmp x26, x5",
-        "cset x22, hs",
-        "cmp x20, #0x1 (1)",
-        "csel x20, x22, x21, eq",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #63, #nzCv",
-        "eor w20, w5, w7",
-        "eor w21, w26, w5",
-        "and w20, w21, w20",
-        "rmif x20, #15, #nzcV",
-        "bfxil x5, x26, #0, #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w24, w21, w23",
+        "uxth w23, w24",
+        "cmp x23, x21",
+        "cset x24, hi",
+        "cmp x23, x21",
+        "cset x25, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x30, x25, x24, eq",
+        "cmn wzr, w23, lsl #16",
+        "rmif x30, #63, #nzCv",
+        "eor w22, w21, w20",
+        "eor w20, w23, w21",
+        "and w24, w20, w22",
+        "rmif x24, #15, #nzcV",
+        "mov x26, x23",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x5, x20"
       ]
     },
     "db 0x1B, 0xcb": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0x1B",
         "sbb ebx, ecx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
         "cfinv",
-        "sbcs w26, w5, w7",
+        "sbcs w22, w21, w20",
         "cfinv",
-        "mov x5, x26"
+        "mov x26, x22",
+        "mov x5, x22"
       ]
     },
     "db 0x48, 0x1B, 0xcb": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0x1B",
         "sbb rbx, rcx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
         "cfinv",
-        "sbcs x26, x5, x7",
+        "sbcs x22, x21, x20",
         "cfinv",
-        "mov x5, x26"
+        "mov x26, x22",
+        "mov x5, x22"
       ]
     },
     "sbb al, 1": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 21,
       "Comment": "0x1C",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w27, w20",
-        "uxtb w26, w20",
-        "cmp x26, x27",
+        "mov x21, x4",
+        "mov x27, x21",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w20, w21, w23",
+        "uxtb w23, w20",
+        "cmp x23, x21",
         "cset x20, hi",
-        "cmp x26, x27",
-        "cset x22, hs",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w27, w26",
+        "cmp x23, x21",
+        "cset x24, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w23, lsl #24",
+        "rmif x25, #63, #nzCv",
+        "bic w20, w21, w23",
         "rmif x20, #7, #nzcV",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #8"
+        "mov x26, x23",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #8",
+        "mov x4, x20"
       ]
     },
     "sbb ax, 1": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 21,
       "Comment": "0x1D",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w27, w20",
-        "uxth w26, w20",
-        "cmp x26, x27",
+        "mov x21, x4",
+        "mov x27, x21",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w20, w21, w23",
+        "uxth w23, w20",
+        "cmp x23, x21",
         "cset x20, hi",
-        "cmp x26, x27",
-        "cset x22, hs",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w27, w26",
+        "cmp x23, x21",
+        "cset x24, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w23, lsl #16",
+        "rmif x25, #63, #nzCv",
+        "bic w20, w21, w23",
         "rmif x20, #15, #nzcV",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #16"
+        "mov x26, x23",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x4, x20"
       ]
     },
     "sbb eax, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x1D",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
+        "mov x21, x4",
+        "mov x27, x21",
         "cfinv",
-        "sbcs w26, w27, w20",
+        "sbcs w22, w21, w20",
         "cfinv",
-        "mov x4, x26"
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sbb rax, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x1D",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
+        "mov x21, x4",
+        "mov x27, x21",
         "cfinv",
-        "sbcs x26, x27, x20",
+        "sbcs x22, x21, x20",
         "cfinv",
-        "mov x4, x26"
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sbb al, -1": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 22,
       "Comment": "0x1C",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w4, w20",
-        "uxtb w26, w20",
-        "cmp x26, x4",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w20, w21, w23",
+        "uxtb w23, w20",
+        "cmp x23, x21",
         "cset x20, hi",
-        "cmp x26, x4",
-        "cset x22, hs",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w26, w4",
+        "cmp x23, x21",
+        "cset x24, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w23, lsl #24",
+        "rmif x25, #63, #nzCv",
+        "bic w20, w23, w21",
         "rmif x20, #7, #nzcV",
-        "bfxil x4, x26, #0, #8"
+        "mov x26, x23",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #8",
+        "mov x4, x20"
       ]
     },
     "sbb ax, -1": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 22,
       "Comment": "0x1D",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "mvn w27, w4",
-        "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w4, w20",
-        "uxth w26, w20",
-        "cmp x26, x4",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w20, w21, w23",
+        "uxth w23, w20",
+        "cmp x23, x21",
         "cset x20, hi",
-        "cmp x26, x4",
-        "cset x22, hs",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w26, w4",
+        "cmp x23, x21",
+        "cset x24, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w23, lsl #16",
+        "rmif x25, #63, #nzCv",
+        "bic w20, w23, w21",
         "rmif x20, #15, #nzcV",
-        "bfxil x4, x26, #0, #16"
+        "mov x26, x23",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x4, x20"
       ]
     },
     "sbb eax, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x1D",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
         "cfinv",
-        "sbcs w26, w4, w20",
+        "sbcs w22, w21, w20",
         "cfinv",
-        "mov x4, x26"
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sbb rax, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x1D",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "mvn w27, w4",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
         "cfinv",
-        "sbcs x26, x4, x20",
+        "sbcs x22, x21, x20",
         "cfinv",
-        "mov x4, x26"
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "and bl, cl": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x20",
       "ExpectedArm64ASM": [
-        "and w26, w7, w5",
-        "cmn wzr, w26, lsl #24",
-        "bfxil x7, x26, #0, #8"
+        "mov x20, x5",
+        "mov x21, x7",
+        "and w22, w21, w20",
+        "cmn wzr, w22, lsl #24",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x7, x20"
       ]
     },
     "and bx, cx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x21",
       "ExpectedArm64ASM": [
-        "and w26, w7, w5",
-        "cmn wzr, w26, lsl #16",
-        "bfxil x7, x26, #0, #16"
+        "mov x20, x5",
+        "mov x21, x7",
+        "and w22, w21, w20",
+        "cmn wzr, w22, lsl #16",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x7, x20"
       ]
     },
     "and ebx, ecx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x21",
       "ExpectedArm64ASM": [
-        "ands w26, w7, w5",
-        "mov x7, x26"
+        "mov x20, x5",
+        "mov x21, x7",
+        "ands w22, w21, w20",
+        "mov x26, x22",
+        "mov x7, x22"
       ]
     },
     "and rbx, rcx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x21",
       "ExpectedArm64ASM": [
-        "ands x26, x7, x5",
-        "mov x7, x26"
+        "mov x20, x5",
+        "mov x21, x7",
+        "ands x22, x21, x20",
+        "mov x26, x22",
+        "mov x7, x22"
       ]
     },
     "db 0x22, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x22",
         "and bl, cl but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "and w26, w5, w7",
-        "cmn wzr, w26, lsl #24",
-        "bfxil x5, x26, #0, #8"
+        "mov x20, x7",
+        "mov x21, x5",
+        "and w22, w21, w20",
+        "cmn wzr, w22, lsl #24",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x5, x20"
       ]
     },
     "db 0x66, 0x23, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x23",
         "and bx, cx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "and w26, w5, w7",
-        "cmn wzr, w26, lsl #16",
-        "bfxil x5, x26, #0, #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "and w22, w21, w20",
+        "cmn wzr, w22, lsl #16",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x5, x20"
       ]
     },
     "db 0x23, 0xcb": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x23",
         "and ebx, ecx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "ands w26, w5, w7",
-        "mov x5, x26"
+        "mov x20, x7",
+        "mov x21, x5",
+        "ands w22, w21, w20",
+        "mov x26, x22",
+        "mov x5, x22"
       ]
     },
     "db 0x48, 0x23, 0xcb": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x23",
         "and rbx, rcx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "ands x26, x5, x7",
-        "mov x5, x26"
+        "mov x20, x7",
+        "mov x21, x5",
+        "ands x22, x21, x20",
+        "mov x26, x22",
+        "mov x5, x22"
       ]
     },
     "and al, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x24",
       "ExpectedArm64ASM": [
-        "and w26, w4, #0x1",
-        "cmn wzr, w26, lsl #24",
-        "bfxil x4, x26, #0, #8"
+        "mov x20, x4",
+        "and w21, w20, #0x1",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22"
       ]
     },
     "and ax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x25",
       "ExpectedArm64ASM": [
-        "and w26, w4, #0x1",
-        "cmn wzr, w26, lsl #16",
-        "bfxil x4, x26, #0, #16"
+        "mov x20, x4",
+        "and w21, w20, #0x1",
+        "cmn wzr, w21, lsl #16",
+        "mov x26, x21",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "and eax, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x25",
       "ExpectedArm64ASM": [
-        "ands w26, w4, #0x1",
-        "mov x4, x26"
+        "mov x20, x4",
+        "ands w21, w20, #0x1",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "and rax, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x25",
       "ExpectedArm64ASM": [
-        "ands x26, x4, #0x1",
-        "mov x4, x26"
+        "mov x20, x4",
+        "ands x21, x20, #0x1",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "and al, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x24",
       "ExpectedArm64ASM": [
-        "and w26, w4, #0xff",
-        "cmn wzr, w26, lsl #24",
-        "bfxil x4, x26, #0, #8"
+        "mov x20, x4",
+        "and w21, w20, #0xff",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22"
       ]
     },
     "and ax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x25",
       "ExpectedArm64ASM": [
-        "and w26, w4, #0xffff",
-        "cmn wzr, w26, lsl #16",
-        "bfxil x4, x26, #0, #16"
+        "mov x20, x4",
+        "and w21, w20, #0xffff",
+        "cmn wzr, w21, lsl #16",
+        "mov x26, x21",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "and eax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x25",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "ands w26, w4, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "ands w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "and rax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x25",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "ands x26, x4, x20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "ands x22, x21, x20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sub bl, cl": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x28",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "lsl w0, w7, #24",
-        "cmp w0, w5, lsl #24",
-        "sub w26, w7, w5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
+        "cmp w0, w20, lsl #24",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "cfinv",
-        "bfxil x7, x26, #0, #8"
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x7, x20"
       ]
     },
     "sub bx, cx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x29",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "lsl w0, w7, #16",
-        "cmp w0, w5, lsl #16",
-        "sub w26, w7, w5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
+        "cmp w0, w20, lsl #16",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "cfinv",
-        "bfxil x7, x26, #0, #16"
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x7, x20"
       ]
     },
     "sub ebx, ecx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x29",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "subs w26, w7, w5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "cfinv",
-        "mov x7, x26"
+        "mov x7, x22"
       ]
     },
     "sub rbx, rcx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x29",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "subs x26, x7, x5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs x22, x21, x20",
+        "mov x26, x22",
         "cfinv",
-        "mov x7, x26"
+        "mov x7, x22"
       ]
     },
     "db 0x2A, 0xcb": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0x2A",
         "sub bl, cl but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "lsl w0, w5, #24",
-        "cmp w0, w7, lsl #24",
-        "sub w26, w5, w7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
+        "cmp w0, w20, lsl #24",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "cfinv",
-        "bfxil x5, x26, #0, #8"
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x5, x20"
       ]
     },
     "db 0x66, 0x2B, 0xcb": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0x2B",
         "sub bx, cx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "lsl w0, w5, #16",
-        "cmp w0, w7, lsl #16",
-        "sub w26, w5, w7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
+        "cmp w0, w20, lsl #16",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "cfinv",
-        "bfxil x5, x26, #0, #16"
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x5, x20"
       ]
     },
     "db 0x2B, 0xcb": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x2B",
         "sub ebx, ecx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "subs w26, w5, w7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "cfinv",
-        "mov x5, x26"
+        "mov x5, x22"
       ]
     },
     "db 0x48, 0x2B, 0xcb": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x2B",
         "sub rbx, rcx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "subs x26, x5, x7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs x22, x21, x20",
+        "mov x26, x22",
         "cfinv",
-        "mov x5, x26"
+        "mov x5, x22"
       ]
     },
     "sub al, 1": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x2C",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #24",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w27, #0x1 (1)",
+        "sub w20, w21, #0x1 (1)",
+        "mov x26, x20",
         "cfinv",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #8"
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "sub ax, 1": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x2D",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #16",
         "cmp w0, w20, lsl #16",
-        "sub w26, w27, #0x1 (1)",
+        "sub w20, w21, #0x1 (1)",
+        "mov x26, x20",
         "cfinv",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #16"
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "sub eax, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x2D",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs w26, w27, #0x1 (1)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs w21, w20, #0x1 (1)",
+        "mov x26, x21",
         "cfinv",
-        "mov x4, x26"
+        "mov x4, x21"
       ]
     },
     "sub rax, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x2D",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs x26, x27, #0x1 (1)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs x21, x20, #0x1 (1)",
+        "mov x26, x21",
         "cfinv",
-        "mov x4, x26"
+        "mov x4, x21"
       ]
     },
     "sub al, -1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x2C",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "lsl w0, w4, #24",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w4, #0xff (255)",
+        "sub w20, w21, #0xff (255)",
+        "mov x26, x20",
         "cfinv",
-        "bfxil x4, x26, #0, #8"
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "sub ax, -1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x2D",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "mvn w27, w4",
-        "lsl w0, w4, #16",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
         "cmp w0, w20, lsl #16",
-        "sub w26, w4, w20",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "cfinv",
-        "bfxil x4, x26, #0, #16"
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x4, x20"
       ]
     },
     "sub eax, -1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x2D",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "subs w26, w4, w20",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "cfinv",
-        "mov x4, x26"
+        "mov x4, x22"
       ]
     },
     "sub rax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x2D",
       "ExpectedArm64ASM": [
-        "mvn w27, w4",
-        "adds x26, x4, #0x1 (1)",
+        "mov x20, x4",
+        "mvn w21, w20",
+        "mov x27, x21",
+        "adds x21, x20, #0x1 (1)",
+        "mov x26, x21",
         "cfinv",
-        "mov x4, x26"
+        "mov x4, x21"
       ]
     },
     "xor bl, cl": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x30",
       "ExpectedArm64ASM": [
-        "eor w26, w7, w5",
-        "bfxil x7, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x7, x20",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #24"
       ]
     },
     "xor bx, cx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x31",
       "ExpectedArm64ASM": [
-        "eor w26, w7, w5",
-        "bfxil x7, x26, #0, #16",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x7, x20",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #16"
       ]
     },
     "xor ebx, ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x31",
       "ExpectedArm64ASM": [
-        "eor w7, w7, w5",
-        "mov x26, x7",
-        "tst w7, w7"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x7, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "xor rbx, rcx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x31",
       "ExpectedArm64ASM": [
-        "eor x7, x7, x5",
-        "mov x26, x7",
-        "tst x7, x7"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor x22, x21, x20",
+        "mov x7, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "db 0x32, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x32",
         "xor bl, cl but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w26, w5, w7",
-        "bfxil x5, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x5, x20",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #24"
       ]
     },
     "db 0x66, 0x33, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x33",
         "xor bx, cx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w26, w5, w7",
-        "bfxil x5, x26, #0, #16",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x5, x20",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #16"
       ]
     },
     "db 0x33, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x33",
         "xor ebx, ecx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w5, w5, w7",
-        "mov x26, x5",
-        "tst w5, w5"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x5, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "db 0x48, 0x33, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x33",
         "xor rbx, rcx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor x5, x5, x7",
-        "mov x26, x5",
-        "tst x5, x5"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor x22, x21, x20",
+        "mov x5, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "xor al, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x34",
       "ExpectedArm64ASM": [
-        "eor w26, w4, #0x1",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "eor w21, w20, #0x1",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "xor ax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x35",
       "ExpectedArm64ASM": [
-        "eor w26, w4, #0x1",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x4",
+        "eor w21, w20, #0x1",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #16"
       ]
     },
     "xor eax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x35",
       "ExpectedArm64ASM": [
-        "eor w4, w4, #0x1",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x4",
+        "eor w21, w20, #0x1",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "xor rax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x35",
       "ExpectedArm64ASM": [
-        "eor x4, x4, #0x1",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x4",
+        "eor x21, x20, #0x1",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst x21, x21"
       ]
     },
     "cmp bl, cl": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x38",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "lsl w0, w7, #24",
-        "cmp w0, w5, lsl #24",
-        "sub w26, w7, w5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
+        "cmp w0, w20, lsl #24",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "xor al, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x34",
       "ExpectedArm64ASM": [
-        "eor w26, w4, #0xff",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "eor w21, w20, #0xff",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "xor ax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x35",
       "ExpectedArm64ASM": [
-        "eor w26, w4, #0xffff",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x4",
+        "eor w21, w20, #0xffff",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #16"
       ]
     },
     "xor eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x35",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "eor w4, w4, w20",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x21, x4",
+        "eor w22, w21, w20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "xor rax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x35",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "eor x4, x4, x20",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x21, x4",
+        "eor x22, x21, x20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "cmp bx, cx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x39",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "lsl w0, w7, #16",
-        "cmp w0, w5, lsl #16",
-        "sub w26, w7, w5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
+        "cmp w0, w20, lsl #16",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "cmp ebx, ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x39",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "subs w26, w7, w5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "cmp rbx, rcx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x39",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "subs x26, x7, x5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs x22, x21, x20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "db 0x3A, 0xcb": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0x3A",
         "cmp bl, cl but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "lsl w0, w5, #24",
-        "cmp w0, w7, lsl #24",
-        "sub w26, w5, w7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
+        "cmp w0, w20, lsl #24",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "db 0x66, 0x3B, 0xcb": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0x3B",
         "cmp bx, cx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "lsl w0, w5, #16",
-        "cmp w0, w7, lsl #16",
-        "sub w26, w5, w7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
+        "cmp w0, w20, lsl #16",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "db 0x3B, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x3B",
         "cmp ebx, ecx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "subs w26, w5, w7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "db 0x48, 0x3B, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x3B",
         "cmp rbx, rcx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "subs x26, x5, x7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs x22, x21, x20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "cmp al, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x3C",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #24",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w27, #0x1 (1)",
+        "sub w20, w21, #0x1 (1)",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "cmp ax, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x3D",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #16",
         "cmp w0, w20, lsl #16",
-        "sub w26, w27, #0x1 (1)",
+        "sub w20, w21, #0x1 (1)",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "cmp eax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x3D",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs w26, w27, #0x1 (1)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs w21, w20, #0x1 (1)",
+        "mov x26, x21",
         "cfinv"
       ]
     },
     "cmp rax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x3D",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs x26, x27, #0x1 (1)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs x21, x20, #0x1 (1)",
+        "mov x26, x21",
         "cfinv"
       ]
     },
     "cmp al, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x3C",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "lsl w0, w4, #24",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w4, #0xff (255)",
+        "sub w20, w21, #0xff (255)",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "cmp ax, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x3D",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "mvn w27, w4",
-        "lsl w0, w4, #16",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
         "cmp w0, w20, lsl #16",
-        "sub w26, w4, w20",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "cmp eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x3D",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "subs w26, w4, w20",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "cmp rax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x3D",
       "ExpectedArm64ASM": [
-        "mvn w27, w4",
-        "adds x26, x4, #0x1 (1)",
+        "mov x20, x4",
+        "mvn w21, w20",
+        "mov x27, x21",
+        "adds x21, x20, #0x1 (1)",
+        "mov x26, x21",
         "cfinv"
       ]
     },
     "imul ax, bx, 257": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x69",
       "ExpectedArm64ASM": [
-        "sxth x20, w7",
-        "mov w21, #0x101",
-        "mul x20, x20, x21",
-        "sbfx x21, x20, #16, #16",
-        "bfxil x4, x20, #0, #16",
-        "sbfx x20, x20, #15, #1",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "sxth x21, w20",
+        "mov w20, #0x101",
+        "mul x22, x21, x20",
+        "sbfx x20, x22, #16, #16",
+        "mov x21, x4",
+        "mov x23, x21",
+        "bfxil x23, x22, #0, #16",
+        "mov x4, x23",
+        "sbfx x21, x22, #15, #1",
+        "cmp x20, x21",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul eax, ebx, 257": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x69",
       "ExpectedArm64ASM": [
-        "mov w20, #0x101",
-        "smull x21, w7, w20",
-        "asr x21, x21, #32",
-        "mul w4, w7, w20",
-        "sbfx x20, x4, #31, #1",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "mov w21, #0x101",
+        "smull x22, w20, w21",
+        "asr x23, x22, #32",
+        "mul w22, w20, w21",
+        "mov x4, x22",
+        "sbfx x20, x22, #31, #1",
+        "cmp x23, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul rax, rbx, 257": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x69",
       "ExpectedArm64ASM": [
-        "mov w20, #0x101",
-        "smulh x21, x7, x20",
-        "mul x4, x7, x20",
-        "asr x20, x4, #63",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "mov w21, #0x101",
+        "smulh x22, x20, x21",
+        "mul x23, x20, x21",
+        "mov x4, x23",
+        "asr x20, x23, #63",
+        "cmp x22, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul ax, bx, 3": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x6b",
       "ExpectedArm64ASM": [
-        "sxth x20, w7",
-        "mov w21, #0x3",
-        "mul x20, x20, x21",
-        "sbfx x21, x20, #16, #16",
-        "bfxil x4, x20, #0, #16",
-        "sbfx x20, x20, #15, #1",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "sxth x21, w20",
+        "mov w20, #0x3",
+        "mul x22, x21, x20",
+        "sbfx x20, x22, #16, #16",
+        "mov x21, x4",
+        "mov x23, x21",
+        "bfxil x23, x22, #0, #16",
+        "mov x4, x23",
+        "sbfx x21, x22, #15, #1",
+        "cmp x20, x21",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul eax, ebx, 3": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x6b",
       "ExpectedArm64ASM": [
-        "mov w20, #0x3",
-        "smull x21, w7, w20",
-        "asr x21, x21, #32",
-        "mul w4, w7, w20",
-        "sbfx x20, x4, #31, #1",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "mov w21, #0x3",
+        "smull x22, w20, w21",
+        "asr x23, x22, #32",
+        "mul w22, w20, w21",
+        "mov x4, x22",
+        "sbfx x20, x22, #31, #1",
+        "cmp x23, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul rax, rbx, 3": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x6b",
       "ExpectedArm64ASM": [
-        "mov w20, #0x3",
-        "smulh x21, x7, x20",
-        "mul x4, x7, x20",
-        "asr x20, x4, #63",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "mov w21, #0x3",
+        "smulh x22, x20, x21",
+        "mul x23, x20, x21",
+        "mov x4, x23",
+        "asr x20, x23, #63",
+        "cmp x22, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "test al, bl": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x84",
       "ExpectedArm64ASM": [
-        "and w26, w4, w7",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x7",
+        "mov x21, x4",
+        "and w22, w21, w20",
+        "cmn wzr, w22, lsl #24",
+        "mov x26, x22"
       ]
     },
     "test ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x84",
       "ExpectedArm64ASM": [
-        "and w26, w4, w7",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x7",
+        "mov x21, x4",
+        "and w22, w21, w20",
+        "cmn wzr, w22, lsl #16",
+        "mov x26, x22"
       ]
     },
     "test eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x84",
       "ExpectedArm64ASM": [
-        "ands w26, w4, w7"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ands w22, w21, w20",
+        "mov x26, x22"
       ]
     },
     "test rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x84",
       "ExpectedArm64ASM": [
-        "ands x26, x4, x7"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ands x22, x21, x20",
+        "mov x26, x22"
       ]
     },
     "pushf": {
-      "ExpectedInstructionCount": 39,
+      "ExpectedInstructionCount": 44,
       "Comment": "0x9c",
       "ExpectedArm64ASM": [
         "cset w20, hs",
-        "eor w21, w27, w26",
-        "ubfx w21, w21, #4, #1",
-        "orr x20, x20, x21, lsl #4",
-        "ldrb w21, [x28, #712]",
-        "orr x20, x20, x21, lsl #8",
-        "ldrb w21, [x28, #713]",
-        "orr x20, x20, x21, lsl #9",
-        "ldrsb x21, [x28, #714]",
-        "lsr x21, x21, #63",
-        "orr x20, x20, x21, lsl #10",
+        "mov x21, x27",
+        "mov x22, x26",
+        "eor w23, w21, w22",
+        "ubfx w21, w23, #4, #1",
+        "orr x23, x20, x21, lsl #4",
+        "ldrb w20, [x28, #712]",
+        "orr x21, x23, x20, lsl #8",
+        "ldrb w20, [x28, #713]",
+        "orr x23, x21, x20, lsl #9",
+        "ldrsb x20, [x28, #714]",
+        "lsr x21, x20, #63",
+        "orr x20, x23, x21, lsl #10",
         "cset w21, vs",
-        "orr x20, x20, x21, lsl #11",
-        "ldrb w21, [x28, #716]",
-        "orr x20, x20, x21, lsl #12",
-        "ldrb w21, [x28, #718]",
-        "orr x20, x20, x21, lsl #14",
-        "ldrb w21, [x28, #720]",
-        "orr x20, x20, x21, lsl #16",
-        "ldrb w21, [x28, #721]",
-        "orr x20, x20, x21, lsl #17",
-        "ldrb w21, [x28, #722]",
-        "orr x20, x20, x21, lsl #18",
-        "ldrb w21, [x28, #723]",
-        "orr x20, x20, x21, lsl #19",
-        "ldrb w21, [x28, #724]",
-        "orr x20, x20, x21, lsl #20",
-        "ldrb w21, [x28, #725]",
-        "orr x20, x20, x21, lsl #21",
-        "eor w21, w26, w26, lsr #4",
-        "eor w21, w21, w21, lsr #2",
-        "eor w21, w21, w21, lsr #1",
-        "orr x21, x21, #0xfffffffffffffffe",
-        "orn x20, x20, x21, ror #62",
+        "orr x23, x20, x21, lsl #11",
+        "ldrb w20, [x28, #716]",
+        "orr x21, x23, x20, lsl #12",
+        "ldrb w20, [x28, #718]",
+        "orr x23, x21, x20, lsl #14",
+        "ldrb w20, [x28, #720]",
+        "orr x21, x23, x20, lsl #16",
+        "ldrb w20, [x28, #721]",
+        "orr x23, x21, x20, lsl #17",
+        "ldrb w20, [x28, #722]",
+        "orr x21, x23, x20, lsl #18",
+        "ldrb w20, [x28, #723]",
+        "orr x23, x21, x20, lsl #19",
+        "ldrb w20, [x28, #724]",
+        "orr x21, x23, x20, lsl #20",
+        "ldrb w20, [x28, #725]",
+        "orr x23, x21, x20, lsl #21",
+        "eor w20, w22, w22, lsr #4",
+        "eor w21, w20, w20, lsr #2",
+        "eor w20, w21, w21, lsr #1",
+        "orr x21, x20, #0xfffffffffffffffe",
+        "orn x20, x23, x21, ror #62",
         "mrs x21, nzcv",
-        "and x21, x21, #0xc0000000",
-        "orr x20, x20, x21, lsr #24",
-        "orr x20, x20, #0x2",
-        "str x20, [x8, #-8]!"
+        "and x22, x21, #0xc0000000",
+        "orr x21, x20, x22, lsr #24",
+        "orr x20, x21, #0x2",
+        "mov x21, x8",
+        "mov x22, x21",
+        "str x20, [x22, #-8]!",
+        "mov x8, x22"
       ]
     },
     "pushfq": {
-      "ExpectedInstructionCount": 39,
+      "ExpectedInstructionCount": 44,
       "Comment": "0x9c",
       "ExpectedArm64ASM": [
         "cset w20, hs",
-        "eor w21, w27, w26",
-        "ubfx w21, w21, #4, #1",
-        "orr x20, x20, x21, lsl #4",
-        "ldrb w21, [x28, #712]",
-        "orr x20, x20, x21, lsl #8",
-        "ldrb w21, [x28, #713]",
-        "orr x20, x20, x21, lsl #9",
-        "ldrsb x21, [x28, #714]",
-        "lsr x21, x21, #63",
-        "orr x20, x20, x21, lsl #10",
+        "mov x21, x27",
+        "mov x22, x26",
+        "eor w23, w21, w22",
+        "ubfx w21, w23, #4, #1",
+        "orr x23, x20, x21, lsl #4",
+        "ldrb w20, [x28, #712]",
+        "orr x21, x23, x20, lsl #8",
+        "ldrb w20, [x28, #713]",
+        "orr x23, x21, x20, lsl #9",
+        "ldrsb x20, [x28, #714]",
+        "lsr x21, x20, #63",
+        "orr x20, x23, x21, lsl #10",
         "cset w21, vs",
-        "orr x20, x20, x21, lsl #11",
-        "ldrb w21, [x28, #716]",
-        "orr x20, x20, x21, lsl #12",
-        "ldrb w21, [x28, #718]",
-        "orr x20, x20, x21, lsl #14",
-        "ldrb w21, [x28, #720]",
-        "orr x20, x20, x21, lsl #16",
-        "ldrb w21, [x28, #721]",
-        "orr x20, x20, x21, lsl #17",
-        "ldrb w21, [x28, #722]",
-        "orr x20, x20, x21, lsl #18",
-        "ldrb w21, [x28, #723]",
-        "orr x20, x20, x21, lsl #19",
-        "ldrb w21, [x28, #724]",
-        "orr x20, x20, x21, lsl #20",
-        "ldrb w21, [x28, #725]",
-        "orr x20, x20, x21, lsl #21",
-        "eor w21, w26, w26, lsr #4",
-        "eor w21, w21, w21, lsr #2",
-        "eor w21, w21, w21, lsr #1",
-        "orr x21, x21, #0xfffffffffffffffe",
-        "orn x20, x20, x21, ror #62",
+        "orr x23, x20, x21, lsl #11",
+        "ldrb w20, [x28, #716]",
+        "orr x21, x23, x20, lsl #12",
+        "ldrb w20, [x28, #718]",
+        "orr x23, x21, x20, lsl #14",
+        "ldrb w20, [x28, #720]",
+        "orr x21, x23, x20, lsl #16",
+        "ldrb w20, [x28, #721]",
+        "orr x23, x21, x20, lsl #17",
+        "ldrb w20, [x28, #722]",
+        "orr x21, x23, x20, lsl #18",
+        "ldrb w20, [x28, #723]",
+        "orr x23, x21, x20, lsl #19",
+        "ldrb w20, [x28, #724]",
+        "orr x21, x23, x20, lsl #20",
+        "ldrb w20, [x28, #725]",
+        "orr x23, x21, x20, lsl #21",
+        "eor w20, w22, w22, lsr #4",
+        "eor w21, w20, w20, lsr #2",
+        "eor w20, w21, w21, lsr #1",
+        "orr x21, x20, #0xfffffffffffffffe",
+        "orn x20, x23, x21, ror #62",
         "mrs x21, nzcv",
-        "and x21, x21, #0xc0000000",
-        "orr x20, x20, x21, lsr #24",
-        "orr x20, x20, #0x2",
-        "str x20, [x8, #-8]!"
+        "and x22, x21, #0xc0000000",
+        "orr x21, x20, x22, lsr #24",
+        "orr x20, x21, #0x2",
+        "mov x21, x8",
+        "mov x22, x21",
+        "str x20, [x22, #-8]!",
+        "mov x8, x22"
       ]
     },
     "popf": {
-      "ExpectedInstructionCount": 34,
+      "ExpectedInstructionCount": 38,
       "Comment": "0x9d",
       "ExpectedArm64ASM": [
-        "ldr x20, [x8]",
-        "add x8, x8, #0x8 (8)",
-        "mov w21, #0x202",
-        "orr x27, x20, x21",
-        "rmif x27, #63, #nzCv",
-        "ubfx w20, w27, #2, #1",
+        "mov x20, x8",
+        "ldr x21, [x20]",
+        "add x22, x20, #0x8 (8)",
+        "mov x8, x22",
+        "mov w20, #0x202",
+        "orr x22, x21, x20",
+        "rmif x22, #63, #nzCv",
+        "ubfx w20, w22, #2, #1",
         "mov w21, #0x1",
-        "eor w26, w20, #0x1",
-        "rmif x27, #4, #nZcv",
-        "rmif x27, #4, #Nzcv",
-        "ubfx w20, w27, #8, #1",
+        "eor w23, w20, #0x1",
+        "mov x26, x23",
+        "mov x27, x22",
+        "rmif x22, #4, #nZcv",
+        "rmif x22, #4, #Nzcv",
+        "ubfx w20, w22, #8, #1",
         "strb w20, [x28, #712]",
-        "ubfx w20, w27, #9, #1",
+        "ubfx w20, w22, #9, #1",
         "strb w20, [x28, #713]",
-        "ubfx w20, w27, #10, #1",
-        "sub x20, x21, x20, lsl #1",
-        "strb w20, [x28, #714]",
-        "rmif x27, #11, #nzcV",
-        "ubfx w20, w27, #12, #1",
+        "ubfx w20, w22, #10, #1",
+        "sub x23, x21, x20, lsl #1",
+        "strb w23, [x28, #714]",
+        "rmif x22, #11, #nzcV",
+        "ubfx w20, w22, #12, #1",
         "strb w20, [x28, #716]",
-        "ubfx w20, w27, #14, #1",
+        "ubfx w20, w22, #14, #1",
         "strb w20, [x28, #718]",
-        "ubfx w20, w27, #16, #1",
+        "ubfx w20, w22, #16, #1",
         "strb w20, [x28, #720]",
-        "ubfx w20, w27, #17, #1",
+        "ubfx w20, w22, #17, #1",
         "strb w20, [x28, #721]",
-        "ubfx w20, w27, #18, #1",
+        "ubfx w20, w22, #18, #1",
         "strb w20, [x28, #722]",
-        "ubfx w20, w27, #19, #1",
+        "ubfx w20, w22, #19, #1",
         "strb w20, [x28, #723]",
-        "ubfx w20, w27, #20, #1",
+        "ubfx w20, w22, #20, #1",
         "strb w20, [x28, #724]",
-        "ubfx w20, w27, #21, #1",
+        "ubfx w20, w22, #21, #1",
         "strb w20, [x28, #725]"
       ]
     },
     "sahf": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x9e",
       "ExpectedArm64ASM": [
-        "ubfx w20, w4, #8, #8",
-        "mov w21, #0x28",
-        "bic x20, x20, x21",
-        "orr x27, x20, #0x2",
-        "rmif x27, #63, #nzCv",
-        "ubfx w20, w27, #2, #1",
-        "eor w26, w20, #0x1",
-        "rmif x27, #4, #nZcv",
-        "rmif x27, #4, #Nzcv"
+        "mov x20, x4",
+        "ubfx w21, w20, #8, #8",
+        "mov w20, #0x28",
+        "bic x22, x21, x20",
+        "orr x20, x22, #0x2",
+        "rmif x20, #63, #nzCv",
+        "ubfx w21, w20, #2, #1",
+        "eor w22, w21, #0x1",
+        "mov x26, x22",
+        "mov x27, x20",
+        "rmif x20, #4, #nZcv",
+        "rmif x20, #4, #Nzcv"
       ]
     },
     "lahf": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 19,
       "Comment": "0x9f",
       "ExpectedArm64ASM": [
         "cset w20, hs",
-        "eor w21, w27, w26",
-        "ubfx w21, w21, #4, #1",
-        "orr x20, x20, x21, lsl #4",
-        "eor w21, w26, w26, lsr #4",
-        "eor w21, w21, w21, lsr #2",
-        "eor w21, w21, w21, lsr #1",
-        "orr x21, x21, #0xfffffffffffffffe",
-        "orn x20, x20, x21, ror #62",
+        "mov x21, x27",
+        "mov x22, x26",
+        "eor w23, w21, w22",
+        "ubfx w21, w23, #4, #1",
+        "orr x23, x20, x21, lsl #4",
+        "eor w20, w22, w22, lsr #4",
+        "eor w21, w20, w20, lsr #2",
+        "eor w20, w21, w21, lsr #1",
+        "orr x21, x20, #0xfffffffffffffffe",
+        "orn x20, x23, x21, ror #62",
         "mrs x21, nzcv",
-        "and x21, x21, #0xc0000000",
-        "orr x20, x20, x21, lsr #24",
-        "orr x20, x20, #0x2",
-        "bfi x4, x20, #8, #8"
+        "and x22, x21, #0xc0000000",
+        "orr x21, x20, x22, lsr #24",
+        "orr x20, x21, #0x2",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfi x22, x20, #8, #8",
+        "mov x4, x22"
       ]
     },
     "cmpsb": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 16,
       "Comment": [
         "0xa6"
       ],
       "ExpectedArm64ASM": [
-        "ldrb w20, [x11]",
-        "ldrb w21, [x10]",
-        "ldrsb x22, [x28, #714]",
-        "add x11, x11, x22",
-        "add x10, x10, x22",
-        "eor w27, w21, w20",
-        "lsl w0, w21, #24",
-        "cmp w0, w20, lsl #24",
-        "sub w26, w21, w20",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrb w22, [x21]",
+        "ldrb w23, [x20]",
+        "ldrsb x24, [x28, #714]",
+        "add x25, x21, x24",
+        "mov x11, x25",
+        "add x21, x20, x24",
+        "mov x10, x21",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "lsl w0, w23, #24",
+        "cmp w0, w22, lsl #24",
+        "sub w20, w23, w22",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "cmpsw": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "0xa7"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x11]",
-        "ldrh w21, [x10]",
-        "ldrsb x22, [x28, #714]",
-        "lsl x22, x22, #1",
-        "add x11, x11, x22",
-        "add x10, x10, x22",
-        "eor w27, w21, w20",
-        "lsl w0, w21, #16",
-        "cmp w0, w20, lsl #16",
-        "sub w26, w21, w20",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrh w22, [x21]",
+        "ldrh w23, [x20]",
+        "ldrsb x24, [x28, #714]",
+        "lsl x25, x24, #1",
+        "add x24, x21, x25",
+        "mov x11, x24",
+        "add x21, x20, x25",
+        "mov x10, x21",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "lsl w0, w23, #16",
+        "cmp w0, w22, lsl #16",
+        "sub w20, w23, w22",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "cmpsd": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xa7"
       ],
       "ExpectedArm64ASM": [
-        "ldr w20, [x11]",
-        "ldr w21, [x10]",
-        "ldrsb x22, [x28, #714]",
-        "lsl x22, x22, #2",
-        "add x11, x11, x22",
-        "add x10, x10, x22",
-        "eor w27, w21, w20",
-        "subs w26, w21, w20",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr w22, [x21]",
+        "ldr w23, [x20]",
+        "ldrsb x24, [x28, #714]",
+        "lsl x25, x24, #2",
+        "add x24, x21, x25",
+        "mov x11, x24",
+        "add x21, x20, x25",
+        "mov x10, x21",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "subs w20, w23, w22",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "cmpsq": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xa7"
       ],
       "ExpectedArm64ASM": [
-        "ldr x20, [x11]",
-        "ldr x21, [x10]",
-        "ldrsb x22, [x28, #714]",
-        "lsl x22, x22, #3",
-        "add x11, x11, x22",
-        "add x10, x10, x22",
-        "eor w27, w21, w20",
-        "subs x26, x21, x20",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr x22, [x21]",
+        "ldr x23, [x20]",
+        "ldrsb x24, [x28, #714]",
+        "lsl x25, x24, #3",
+        "add x24, x21, x25",
+        "mov x11, x24",
+        "add x21, x20, x25",
+        "mov x10, x21",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "subs x20, x23, x22",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "repz cmpsb": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 46,
       "Comment": "0xa6",
       "ExpectedArm64ASM": [
-        "cbz x5, #+0x68",
+        "mov x20, x5",
+        "cbz x20, #+0xb4",
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x24",
-        "ldrb w26, [x11]",
-        "ldrb w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "add x11, x11, #0x1 (1)",
-        "add x10, x10, #0x1 (1)",
-        "ccmp x27, x26, #nzcv, ne",
-        "b.eq #-0x18",
-        "b #+0x20",
-        "ldrb w26, [x11]",
-        "ldrb w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x1 (1)",
-        "sub x10, x10, #0x1 (1)",
-        "ccmp x27, x26, #nzcv, ne",
-        "b.eq #-0x18",
-        "mov x20, x27",
-        "eor w27, w20, w26",
-        "lsl w0, w20, #24",
-        "cmp w0, w26, lsl #24",
-        "sub w26, w20, w26",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x44",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrb w22, [x21]",
+        "ldrb w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "add x24, x21, #0x1 (1)",
+        "mov x11, x24",
+        "add x21, x20, #0x1 (1)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nzcv, ne",
+        "b.eq #-0x38",
+        "b #+0x40",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrb w22, [x21]",
+        "ldrb w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "sub x24, x21, #0x1 (1)",
+        "mov x11, x24",
+        "sub x21, x20, #0x1 (1)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nzcv, ne",
+        "b.eq #-0x38",
+        "mov x20, x26",
+        "mov x21, x27",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
+        "cmp w0, w20, lsl #24",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "repz cmpsw": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 46,
       "Comment": "0xa7",
       "ExpectedArm64ASM": [
-        "cbz x5, #+0x68",
+        "mov x20, x5",
+        "cbz x20, #+0xb4",
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x24",
-        "ldrh w26, [x11]",
-        "ldrh w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "add x11, x11, #0x2 (2)",
-        "add x10, x10, #0x2 (2)",
-        "ccmp x27, x26, #nzcv, ne",
-        "b.eq #-0x18",
-        "b #+0x20",
-        "ldrh w26, [x11]",
-        "ldrh w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x2 (2)",
-        "sub x10, x10, #0x2 (2)",
-        "ccmp x27, x26, #nzcv, ne",
-        "b.eq #-0x18",
-        "mov x20, x27",
-        "eor w27, w20, w26",
-        "lsl w0, w20, #16",
-        "cmp w0, w26, lsl #16",
-        "sub w26, w20, w26",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x44",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrh w22, [x21]",
+        "ldrh w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "add x24, x21, #0x2 (2)",
+        "mov x11, x24",
+        "add x21, x20, #0x2 (2)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nzcv, ne",
+        "b.eq #-0x38",
+        "b #+0x40",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrh w22, [x21]",
+        "ldrh w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "sub x24, x21, #0x2 (2)",
+        "mov x11, x24",
+        "sub x21, x20, #0x2 (2)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nzcv, ne",
+        "b.eq #-0x38",
+        "mov x20, x26",
+        "mov x21, x27",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
+        "cmp w0, w20, lsl #16",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "repz cmpsd": {
-      "ExpectedInstructionCount": 24,
+      "ExpectedInstructionCount": 44,
       "Comment": "0xa7",
       "ExpectedArm64ASM": [
-        "cbz x5, #+0x60",
+        "mov x20, x5",
+        "cbz x20, #+0xac",
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x24",
-        "ldr w26, [x11]",
-        "ldr w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "add x11, x11, #0x4 (4)",
-        "add x10, x10, #0x4 (4)",
-        "ccmp x27, x26, #nzcv, ne",
-        "b.eq #-0x18",
-        "b #+0x20",
-        "ldr w26, [x11]",
-        "ldr w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x4 (4)",
-        "sub x10, x10, #0x4 (4)",
-        "ccmp x27, x26, #nzcv, ne",
-        "b.eq #-0x18",
-        "mov x20, x27",
-        "eor w27, w20, w26",
-        "subs w26, w20, w26",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x44",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr w22, [x21]",
+        "ldr w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "add x24, x21, #0x4 (4)",
+        "mov x11, x24",
+        "add x21, x20, #0x4 (4)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nzcv, ne",
+        "b.eq #-0x38",
+        "b #+0x40",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr w22, [x21]",
+        "ldr w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "sub x24, x21, #0x4 (4)",
+        "mov x11, x24",
+        "sub x21, x20, #0x4 (4)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nzcv, ne",
+        "b.eq #-0x38",
+        "mov x20, x26",
+        "mov x21, x27",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "repz cmpsq": {
-      "ExpectedInstructionCount": 24,
+      "ExpectedInstructionCount": 44,
       "Comment": "0xa7",
       "ExpectedArm64ASM": [
-        "cbz x5, #+0x60",
+        "mov x20, x5",
+        "cbz x20, #+0xac",
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x24",
-        "ldr x26, [x11]",
-        "ldr x27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "add x11, x11, #0x8 (8)",
-        "add x10, x10, #0x8 (8)",
-        "ccmp x27, x26, #nzcv, ne",
-        "b.eq #-0x18",
-        "b #+0x20",
-        "ldr x26, [x11]",
-        "ldr x27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x8 (8)",
-        "sub x10, x10, #0x8 (8)",
-        "ccmp x27, x26, #nzcv, ne",
-        "b.eq #-0x18",
-        "mov x20, x27",
-        "eor w27, w20, w26",
-        "subs x26, x20, x26",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x44",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr x22, [x21]",
+        "ldr x23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "add x24, x21, #0x8 (8)",
+        "mov x11, x24",
+        "add x21, x20, #0x8 (8)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nzcv, ne",
+        "b.eq #-0x38",
+        "b #+0x40",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr x22, [x21]",
+        "ldr x23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "sub x24, x21, #0x8 (8)",
+        "mov x11, x24",
+        "sub x21, x20, #0x8 (8)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nzcv, ne",
+        "b.eq #-0x38",
+        "mov x20, x26",
+        "mov x21, x27",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs x22, x21, x20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "repnz cmpsb": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 46,
       "Comment": "0xa6",
       "ExpectedArm64ASM": [
-        "cbz x5, #+0x68",
+        "mov x20, x5",
+        "cbz x20, #+0xb4",
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x24",
-        "ldrb w26, [x11]",
-        "ldrb w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "add x11, x11, #0x1 (1)",
-        "add x10, x10, #0x1 (1)",
-        "ccmp x27, x26, #nZcv, ne",
-        "b.ne #-0x18",
-        "b #+0x20",
-        "ldrb w26, [x11]",
-        "ldrb w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x1 (1)",
-        "sub x10, x10, #0x1 (1)",
-        "ccmp x27, x26, #nZcv, ne",
-        "b.ne #-0x18",
-        "mov x20, x27",
-        "eor w27, w20, w26",
-        "lsl w0, w20, #24",
-        "cmp w0, w26, lsl #24",
-        "sub w26, w20, w26",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x44",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrb w22, [x21]",
+        "ldrb w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "add x24, x21, #0x1 (1)",
+        "mov x11, x24",
+        "add x21, x20, #0x1 (1)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nZcv, ne",
+        "b.ne #-0x38",
+        "b #+0x40",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrb w22, [x21]",
+        "ldrb w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "sub x24, x21, #0x1 (1)",
+        "mov x11, x24",
+        "sub x21, x20, #0x1 (1)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nZcv, ne",
+        "b.ne #-0x38",
+        "mov x20, x26",
+        "mov x21, x27",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
+        "cmp w0, w20, lsl #24",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "repnz cmpsw": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 46,
       "Comment": "0xa7",
       "ExpectedArm64ASM": [
-        "cbz x5, #+0x68",
+        "mov x20, x5",
+        "cbz x20, #+0xb4",
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x24",
-        "ldrh w26, [x11]",
-        "ldrh w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "add x11, x11, #0x2 (2)",
-        "add x10, x10, #0x2 (2)",
-        "ccmp x27, x26, #nZcv, ne",
-        "b.ne #-0x18",
-        "b #+0x20",
-        "ldrh w26, [x11]",
-        "ldrh w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x2 (2)",
-        "sub x10, x10, #0x2 (2)",
-        "ccmp x27, x26, #nZcv, ne",
-        "b.ne #-0x18",
-        "mov x20, x27",
-        "eor w27, w20, w26",
-        "lsl w0, w20, #16",
-        "cmp w0, w26, lsl #16",
-        "sub w26, w20, w26",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x44",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrh w22, [x21]",
+        "ldrh w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "add x24, x21, #0x2 (2)",
+        "mov x11, x24",
+        "add x21, x20, #0x2 (2)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nZcv, ne",
+        "b.ne #-0x38",
+        "b #+0x40",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrh w22, [x21]",
+        "ldrh w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "sub x24, x21, #0x2 (2)",
+        "mov x11, x24",
+        "sub x21, x20, #0x2 (2)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nZcv, ne",
+        "b.ne #-0x38",
+        "mov x20, x26",
+        "mov x21, x27",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
+        "cmp w0, w20, lsl #16",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "repnz cmpsd": {
-      "ExpectedInstructionCount": 24,
+      "ExpectedInstructionCount": 44,
       "Comment": "0xa7",
       "ExpectedArm64ASM": [
-        "cbz x5, #+0x60",
+        "mov x20, x5",
+        "cbz x20, #+0xac",
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x24",
-        "ldr w26, [x11]",
-        "ldr w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "add x11, x11, #0x4 (4)",
-        "add x10, x10, #0x4 (4)",
-        "ccmp x27, x26, #nZcv, ne",
-        "b.ne #-0x18",
-        "b #+0x20",
-        "ldr w26, [x11]",
-        "ldr w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x4 (4)",
-        "sub x10, x10, #0x4 (4)",
-        "ccmp x27, x26, #nZcv, ne",
-        "b.ne #-0x18",
-        "mov x20, x27",
-        "eor w27, w20, w26",
-        "subs w26, w20, w26",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x44",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr w22, [x21]",
+        "ldr w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "add x24, x21, #0x4 (4)",
+        "mov x11, x24",
+        "add x21, x20, #0x4 (4)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nZcv, ne",
+        "b.ne #-0x38",
+        "b #+0x40",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr w22, [x21]",
+        "ldr w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "sub x24, x21, #0x4 (4)",
+        "mov x11, x24",
+        "sub x21, x20, #0x4 (4)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nZcv, ne",
+        "b.ne #-0x38",
+        "mov x20, x26",
+        "mov x21, x27",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "repnz cmpsq": {
-      "ExpectedInstructionCount": 24,
+      "ExpectedInstructionCount": 44,
       "Comment": "0xa7",
       "ExpectedArm64ASM": [
-        "cbz x5, #+0x60",
+        "mov x20, x5",
+        "cbz x20, #+0xac",
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x24",
-        "ldr x26, [x11]",
-        "ldr x27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "add x11, x11, #0x8 (8)",
-        "add x10, x10, #0x8 (8)",
-        "ccmp x27, x26, #nZcv, ne",
-        "b.ne #-0x18",
-        "b #+0x20",
-        "ldr x26, [x11]",
-        "ldr x27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x8 (8)",
-        "sub x10, x10, #0x8 (8)",
-        "ccmp x27, x26, #nZcv, ne",
-        "b.ne #-0x18",
-        "mov x20, x27",
-        "eor w27, w20, w26",
-        "subs x26, x20, x26",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x44",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr x22, [x21]",
+        "ldr x23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "add x24, x21, #0x8 (8)",
+        "mov x11, x24",
+        "add x21, x20, #0x8 (8)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nZcv, ne",
+        "b.ne #-0x38",
+        "b #+0x40",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr x22, [x21]",
+        "ldr x23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "sub x24, x21, #0x8 (8)",
+        "mov x11, x24",
+        "sub x21, x20, #0x8 (8)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nZcv, ne",
+        "b.ne #-0x38",
+        "mov x20, x26",
+        "mov x21, x27",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs x22, x21, x20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "test al, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xa8",
       "ExpectedArm64ASM": [
-        "and w26, w4, #0x1",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "and w21, w20, #0x1",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21"
       ]
     },
     "test ax, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xa9",
       "ExpectedArm64ASM": [
-        "and w26, w4, #0x1",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x4",
+        "and w21, w20, #0x1",
+        "cmn wzr, w21, lsl #16",
+        "mov x26, x21"
       ]
     },
     "test eax, 1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xa9",
       "ExpectedArm64ASM": [
-        "ands w26, w4, #0x1"
+        "mov x20, x4",
+        "ands w21, w20, #0x1",
+        "mov x26, x21"
       ]
     },
     "test rax, 1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xa9",
       "ExpectedArm64ASM": [
-        "ands x26, x4, #0x1"
+        "mov x20, x4",
+        "ands x21, x20, #0x1",
+        "mov x26, x21"
       ]
     },
     "test al, -1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xa8",
       "ExpectedArm64ASM": [
-        "mov x26, x4",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "mov x21, x20",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21"
       ]
     },
     "test ax, -1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xa9",
       "ExpectedArm64ASM": [
-        "mov x26, x4",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x4",
+        "mov x21, x20",
+        "cmn wzr, w21, lsl #16",
+        "mov x26, x21"
       ]
     },
     "test eax, -1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xa9",
       "ExpectedArm64ASM": [
-        "ands w26, w4, w4"
+        "mov x20, x4",
+        "ands w21, w20, w20",
+        "mov x26, x21"
       ]
     },
     "test rax, -1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xa9",
       "ExpectedArm64ASM": [
-        "ands x26, x4, x4"
+        "mov x20, x4",
+        "ands x21, x20, x20",
+        "mov x26, x21"
       ]
     },
     "scasb": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 13,
       "Comment": "0xae",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x11]",
-        "ldrsb x21, [x28, #714]",
-        "add x11, x11, x21",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #24",
-        "cmp w0, w20, lsl #24",
-        "sub w26, w4, w20",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrb w22, [x20]",
+        "ldrsb x23, [x28, #714]",
+        "add x24, x20, x23",
+        "mov x11, x24",
+        "eor w20, w21, w22",
+        "mov x27, x20",
+        "lsl w0, w21, #24",
+        "cmp w0, w22, lsl #24",
+        "sub w20, w21, w22",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "scasw": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 13,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x11]",
-        "ldrsb x21, [x28, #714]",
-        "add x11, x11, x21, lsl #1",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #16",
-        "cmp w0, w20, lsl #16",
-        "sub w26, w4, w20",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrh w22, [x20]",
+        "ldrsb x23, [x28, #714]",
+        "add x24, x20, x23, lsl #1",
+        "mov x11, x24",
+        "eor w20, w21, w22",
+        "mov x27, x20",
+        "lsl w0, w21, #16",
+        "cmp w0, w22, lsl #16",
+        "sub w20, w21, w22",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "scasd": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
-        "ldr w20, [x11]",
-        "ldrsb x21, [x28, #714]",
-        "add x11, x11, x21, lsl #2",
-        "eor w27, w4, w20",
-        "subs w26, w4, w20",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr w22, [x20]",
+        "ldrsb x23, [x28, #714]",
+        "add x24, x20, x23, lsl #2",
+        "mov x11, x24",
+        "eor w20, w21, w22",
+        "mov x27, x20",
+        "subs w20, w21, w22",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "scasq": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
-        "ldr x20, [x11]",
-        "ldrsb x21, [x28, #714]",
-        "add x11, x11, x21, lsl #3",
-        "eor w27, w4, w20",
-        "subs x26, x4, x20",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr x22, [x20]",
+        "ldrsb x23, [x28, #714]",
+        "add x24, x20, x23, lsl #3",
+        "mov x11, x24",
+        "eor w20, w21, w22",
+        "mov x27, x20",
+        "subs x20, x21, x22",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "repz scasb": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 41,
       "Comment": "0xae",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x30",
-        "cbz x5, #+0x28",
-        "ldrb w20, [x11]",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #24",
-        "cmp w0, w20, lsl #24",
-        "sub w26, w4, w20",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x50",
+        "mov x20, x5",
+        "cbz x20, #+0x44",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrb w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "lsl w0, w21, #24",
+        "cmp w0, w22, lsl #24",
+        "sub w23, w21, w22",
+        "mov x26, x23",
         "cfinv",
-        "sub x5, x5, #0x1 (1)",
-        "add x11, x11, #0x1 (1)",
-        "b.eq #-0x24",
-        "b #+0x2c",
-        "cbz x5, #+0x28",
-        "ldrb w20, [x11]",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #24",
-        "cmp w0, w20, lsl #24",
-        "sub w26, w4, w20",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x1 (1)",
+        "mov x11, x21",
+        "b.eq #-0x44",
+        "b #+0x4c",
+        "mov x20, x5",
+        "cbz x20, #+0x44",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrb w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "lsl w0, w21, #24",
+        "cmp w0, w22, lsl #24",
+        "sub w23, w21, w22",
+        "mov x26, x23",
         "cfinv",
-        "sub x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x1 (1)",
-        "b.eq #-0x24"
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x1 (1)",
+        "mov x11, x21",
+        "b.eq #-0x44"
       ]
     },
     "repz scasw": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 41,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x30",
-        "cbz x5, #+0x28",
-        "ldrh w20, [x11]",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #16",
-        "cmp w0, w20, lsl #16",
-        "sub w26, w4, w20",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x50",
+        "mov x20, x5",
+        "cbz x20, #+0x44",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrh w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "lsl w0, w21, #16",
+        "cmp w0, w22, lsl #16",
+        "sub w23, w21, w22",
+        "mov x26, x23",
         "cfinv",
-        "sub x5, x5, #0x1 (1)",
-        "add x11, x11, #0x2 (2)",
-        "b.eq #-0x24",
-        "b #+0x2c",
-        "cbz x5, #+0x28",
-        "ldrh w20, [x11]",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #16",
-        "cmp w0, w20, lsl #16",
-        "sub w26, w4, w20",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x2 (2)",
+        "mov x11, x21",
+        "b.eq #-0x44",
+        "b #+0x4c",
+        "mov x20, x5",
+        "cbz x20, #+0x44",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrh w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "lsl w0, w21, #16",
+        "cmp w0, w22, lsl #16",
+        "sub w23, w21, w22",
+        "mov x26, x23",
         "cfinv",
-        "sub x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x2 (2)",
-        "b.eq #-0x24"
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x2 (2)",
+        "mov x11, x21",
+        "b.eq #-0x44"
       ]
     },
     "repz scasd": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 37,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x28",
-        "cbz x5, #+0x20",
-        "ldr w20, [x11]",
-        "eor w27, w4, w20",
-        "subs w26, w4, w20",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x48",
+        "mov x20, x5",
+        "cbz x20, #+0x3c",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "subs w23, w21, w22",
+        "mov x26, x23",
         "cfinv",
-        "sub x5, x5, #0x1 (1)",
-        "add x11, x11, #0x4 (4)",
-        "b.eq #-0x1c",
-        "b #+0x24",
-        "cbz x5, #+0x20",
-        "ldr w20, [x11]",
-        "eor w27, w4, w20",
-        "subs w26, w4, w20",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x4 (4)",
+        "mov x11, x21",
+        "b.eq #-0x3c",
+        "b #+0x44",
+        "mov x20, x5",
+        "cbz x20, #+0x3c",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "subs w23, w21, w22",
+        "mov x26, x23",
         "cfinv",
-        "sub x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x4 (4)",
-        "b.eq #-0x1c"
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x4 (4)",
+        "mov x11, x21",
+        "b.eq #-0x3c"
       ]
     },
     "repz scasq": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 37,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x28",
-        "cbz x5, #+0x20",
-        "ldr x20, [x11]",
-        "eor w27, w4, w20",
-        "subs x26, x4, x20",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x48",
+        "mov x20, x5",
+        "cbz x20, #+0x3c",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr x22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "subs x23, x21, x22",
+        "mov x26, x23",
         "cfinv",
-        "sub x5, x5, #0x1 (1)",
-        "add x11, x11, #0x8 (8)",
-        "b.eq #-0x1c",
-        "b #+0x24",
-        "cbz x5, #+0x20",
-        "ldr x20, [x11]",
-        "eor w27, w4, w20",
-        "subs x26, x4, x20",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x8 (8)",
+        "mov x11, x21",
+        "b.eq #-0x3c",
+        "b #+0x44",
+        "mov x20, x5",
+        "cbz x20, #+0x3c",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr x22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "subs x23, x21, x22",
+        "mov x26, x23",
         "cfinv",
-        "sub x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x8 (8)",
-        "b.eq #-0x1c"
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x8 (8)",
+        "mov x11, x21",
+        "b.eq #-0x3c"
       ]
     },
     "repnz scasb": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 41,
       "Comment": "0xae",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x30",
-        "cbz x5, #+0x28",
-        "ldrb w20, [x11]",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #24",
-        "cmp w0, w20, lsl #24",
-        "sub w26, w4, w20",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x50",
+        "mov x20, x5",
+        "cbz x20, #+0x44",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrb w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "lsl w0, w21, #24",
+        "cmp w0, w22, lsl #24",
+        "sub w23, w21, w22",
+        "mov x26, x23",
         "cfinv",
-        "sub x5, x5, #0x1 (1)",
-        "add x11, x11, #0x1 (1)",
-        "b.ne #-0x24",
-        "b #+0x2c",
-        "cbz x5, #+0x28",
-        "ldrb w20, [x11]",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #24",
-        "cmp w0, w20, lsl #24",
-        "sub w26, w4, w20",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x1 (1)",
+        "mov x11, x21",
+        "b.ne #-0x44",
+        "b #+0x4c",
+        "mov x20, x5",
+        "cbz x20, #+0x44",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrb w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "lsl w0, w21, #24",
+        "cmp w0, w22, lsl #24",
+        "sub w23, w21, w22",
+        "mov x26, x23",
         "cfinv",
-        "sub x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x1 (1)",
-        "b.ne #-0x24"
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x1 (1)",
+        "mov x11, x21",
+        "b.ne #-0x44"
       ]
     },
     "repnz scasw": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 41,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x30",
-        "cbz x5, #+0x28",
-        "ldrh w20, [x11]",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #16",
-        "cmp w0, w20, lsl #16",
-        "sub w26, w4, w20",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x50",
+        "mov x20, x5",
+        "cbz x20, #+0x44",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrh w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "lsl w0, w21, #16",
+        "cmp w0, w22, lsl #16",
+        "sub w23, w21, w22",
+        "mov x26, x23",
         "cfinv",
-        "sub x5, x5, #0x1 (1)",
-        "add x11, x11, #0x2 (2)",
-        "b.ne #-0x24",
-        "b #+0x2c",
-        "cbz x5, #+0x28",
-        "ldrh w20, [x11]",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #16",
-        "cmp w0, w20, lsl #16",
-        "sub w26, w4, w20",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x2 (2)",
+        "mov x11, x21",
+        "b.ne #-0x44",
+        "b #+0x4c",
+        "mov x20, x5",
+        "cbz x20, #+0x44",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrh w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "lsl w0, w21, #16",
+        "cmp w0, w22, lsl #16",
+        "sub w23, w21, w22",
+        "mov x26, x23",
         "cfinv",
-        "sub x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x2 (2)",
-        "b.ne #-0x24"
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x2 (2)",
+        "mov x11, x21",
+        "b.ne #-0x44"
       ]
     },
     "repnz scasd": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 37,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x28",
-        "cbz x5, #+0x20",
-        "ldr w20, [x11]",
-        "eor w27, w4, w20",
-        "subs w26, w4, w20",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x48",
+        "mov x20, x5",
+        "cbz x20, #+0x3c",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "subs w23, w21, w22",
+        "mov x26, x23",
         "cfinv",
-        "sub x5, x5, #0x1 (1)",
-        "add x11, x11, #0x4 (4)",
-        "b.ne #-0x1c",
-        "b #+0x24",
-        "cbz x5, #+0x20",
-        "ldr w20, [x11]",
-        "eor w27, w4, w20",
-        "subs w26, w4, w20",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x4 (4)",
+        "mov x11, x21",
+        "b.ne #-0x3c",
+        "b #+0x44",
+        "mov x20, x5",
+        "cbz x20, #+0x3c",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "subs w23, w21, w22",
+        "mov x26, x23",
         "cfinv",
-        "sub x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x4 (4)",
-        "b.ne #-0x1c"
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x4 (4)",
+        "mov x11, x21",
+        "b.ne #-0x3c"
       ]
     },
     "repnz scasq": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 37,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x28",
-        "cbz x5, #+0x20",
-        "ldr x20, [x11]",
-        "eor w27, w4, w20",
-        "subs x26, x4, x20",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x48",
+        "mov x20, x5",
+        "cbz x20, #+0x3c",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr x22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "subs x23, x21, x22",
+        "mov x26, x23",
         "cfinv",
-        "sub x5, x5, #0x1 (1)",
-        "add x11, x11, #0x8 (8)",
-        "b.ne #-0x1c",
-        "b #+0x24",
-        "cbz x5, #+0x20",
-        "ldr x20, [x11]",
-        "eor w27, w4, w20",
-        "subs x26, x4, x20",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x8 (8)",
+        "mov x11, x21",
+        "b.ne #-0x3c",
+        "b #+0x44",
+        "mov x20, x5",
+        "cbz x20, #+0x3c",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr x22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "subs x23, x21, x22",
+        "mov x26, x23",
         "cfinv",
-        "sub x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x8 (8)",
-        "b.ne #-0x1c"
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x8 (8)",
+        "mov x11, x21",
+        "b.ne #-0x3c"
       ]
     },
     "cmc": {
diff --git a/unittests/InstructionCountCI/FlagM/PrimaryGroup.json b/unittests/InstructionCountCI/FlagM/PrimaryGroup.json
index 6c1b652617..5fb1cf6c48 100644
--- a/unittests/InstructionCountCI/FlagM/PrimaryGroup.json
+++ b/unittests/InstructionCountCI/FlagM/PrimaryGroup.json
@@ -12,2426 +12,3074 @@
   },
   "Instructions": {
     "add al, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x80 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #24",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #24",
         "cmn w0, w20, lsl #24",
-        "add w26, w27, #0x1 (1)",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #8"
+        "add w20, w21, #0x1 (1)",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "or al, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x80 /1",
       "ExpectedArm64ASM": [
-        "orr w26, w4, #0x1",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "orr w21, w20, #0x1",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "adc al, 1": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 20,
       "Comment": "GROUP1 0x80 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "cset w21, hs",
-        "adc w20, w27, w20",
-        "uxtb w26, w20",
-        "cmp w26, #0x1 (1)",
-        "cset x20, lo",
-        "cmp w26, #0x1 (1)",
-        "cset x22, ls",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w26, w27",
-        "rmif x20, #7, #nzcV",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #8"
+        "mov x21, x4",
+        "mov x27, x21",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxtb w20, w23",
+        "cmp w20, #0x1 (1)",
+        "cset x23, lo",
+        "cmp w20, #0x1 (1)",
+        "cset x24, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x23, eq",
+        "cmn wzr, w20, lsl #24",
+        "rmif x25, #63, #nzCv",
+        "bic w22, w20, w21",
+        "rmif x22, #7, #nzcV",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "sbb al, 1": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 21,
       "Comment": "GROUP1 0x80 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w27, w20",
-        "uxtb w26, w20",
-        "cmp x26, x27",
+        "mov x21, x4",
+        "mov x27, x21",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w20, w21, w23",
+        "uxtb w23, w20",
+        "cmp x23, x21",
         "cset x20, hi",
-        "cmp x26, x27",
-        "cset x22, hs",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w27, w26",
+        "cmp x23, x21",
+        "cset x24, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w23, lsl #24",
+        "rmif x25, #63, #nzCv",
+        "bic w20, w21, w23",
         "rmif x20, #7, #nzcV",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #8"
+        "mov x26, x23",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #8",
+        "mov x4, x20"
       ]
     },
     "and al, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x80 /4",
       "ExpectedArm64ASM": [
-        "and w26, w4, #0x1",
-        "cmn wzr, w26, lsl #24",
-        "bfxil x4, x26, #0, #8"
+        "mov x20, x4",
+        "and w21, w20, #0x1",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22"
       ]
     },
     "sub al, 1": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP1 0x80 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #24",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w27, #0x1 (1)",
+        "sub w20, w21, #0x1 (1)",
+        "mov x26, x20",
         "cfinv",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #8"
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "xor al, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x80 /6",
       "ExpectedArm64ASM": [
-        "eor w26, w4, #0x1",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "eor w21, w20, #0x1",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "cmp al, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x80 /7",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #24",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w27, #0x1 (1)",
+        "sub w20, w21, #0x1 (1)",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "add al, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP1 0x80 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "lsl w0, w4, #24",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
         "cmn w0, w20, lsl #24",
-        "add w26, w4, #0xff (255)",
-        "bfxil x4, x26, #0, #8"
+        "add w20, w21, #0xff (255)",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "or al, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x80 /1",
       "ExpectedArm64ASM": [
-        "orr w26, w4, #0xff",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "orr w21, w20, #0xff",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "adc al, -1": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 21,
       "Comment": "GROUP1 0x80 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "cset w21, hs",
-        "adc w20, w4, w20",
-        "uxtb w26, w20",
-        "cmp w26, #0xff (255)",
-        "cset x20, lo",
-        "cmp w26, #0xff (255)",
-        "cset x22, ls",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w4, w26",
-        "rmif x20, #7, #nzcV",
-        "bfxil x4, x26, #0, #8"
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxtb w20, w23",
+        "cmp w20, #0xff (255)",
+        "cset x23, lo",
+        "cmp w20, #0xff (255)",
+        "cset x24, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x23, eq",
+        "cmn wzr, w20, lsl #24",
+        "rmif x25, #63, #nzCv",
+        "bic w22, w21, w20",
+        "rmif x22, #7, #nzcV",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "sbb al, -1": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 22,
       "Comment": "GROUP1 0x80 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w4, w20",
-        "uxtb w26, w20",
-        "cmp x26, x4",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w20, w21, w23",
+        "uxtb w23, w20",
+        "cmp x23, x21",
         "cset x20, hi",
-        "cmp x26, x4",
-        "cset x22, hs",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv",
-        "bic w20, w26, w4",
+        "cmp x23, x21",
+        "cset x24, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w23, lsl #24",
+        "rmif x25, #63, #nzCv",
+        "bic w20, w23, w21",
         "rmif x20, #7, #nzcV",
-        "bfxil x4, x26, #0, #8"
+        "mov x26, x23",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #8",
+        "mov x4, x20"
       ]
     },
     "and al, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x80 /4",
       "ExpectedArm64ASM": [
-        "and w26, w4, #0xff",
-        "cmn wzr, w26, lsl #24",
-        "bfxil x4, x26, #0, #8"
+        "mov x20, x4",
+        "and w21, w20, #0xff",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22"
       ]
     },
     "sub al, -1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP1 0x80 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "lsl w0, w4, #24",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w4, #0xff (255)",
+        "sub w20, w21, #0xff (255)",
+        "mov x26, x20",
         "cfinv",
-        "bfxil x4, x26, #0, #8"
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "xor al, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x80 /6",
       "ExpectedArm64ASM": [
-        "eor w26, w4, #0xff",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "eor w21, w20, #0xff",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "cmp al, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP1 0x80 /7",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "lsl w0, w4, #24",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w4, #0xff (255)",
+        "sub w20, w21, #0xff (255)",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "add ax, 256": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "mov x27, x4",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w27, #0x100 (256)",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #16"
+        "add w20, w21, #0x100 (256)",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "add eax, 256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "adds w26, w27, #0x100 (256)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mov x27, x20",
+        "adds w21, w20, #0x100 (256)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "add rax, 256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "adds x26, x27, #0x100 (256)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mov x27, x20",
+        "adds x21, x20, #0x100 (256)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "or eax, 256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
-        "orr w4, w4, #0x100",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x4",
+        "orr w21, w20, #0x100",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "or rax, 256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
-        "orr x4, x4, #0x100",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x4",
+        "orr x21, x20, #0x100",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst x21, x21"
       ]
     },
     "adc eax, 256": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "mov x27, x4",
-        "adcs w26, w27, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adcs w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "adc rax, 256": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "mov x27, x4",
-        "adcs x26, x27, x20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adcs x22, x21, x20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sbb eax, 256": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "mov x27, x4",
+        "mov x21, x4",
+        "mov x27, x21",
         "cfinv",
-        "sbcs w26, w27, w20",
+        "sbcs w22, w21, w20",
         "cfinv",
-        "mov x4, x26"
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sbb rax, 256": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "mov x27, x4",
+        "mov x21, x4",
+        "mov x27, x21",
         "cfinv",
-        "sbcs x26, x27, x20",
+        "sbcs x22, x21, x20",
         "cfinv",
-        "mov x4, x26"
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "and eax, 256": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
-        "ands w26, w4, #0x100",
-        "mov x4, x26"
+        "mov x20, x4",
+        "ands w21, w20, #0x100",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "and rax, 256": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
-        "ands x26, x4, #0x100",
-        "mov x4, x26"
+        "mov x20, x4",
+        "ands x21, x20, #0x100",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "sub eax, 256": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs w26, w27, #0x100 (256)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs w21, w20, #0x100 (256)",
+        "mov x26, x21",
         "cfinv",
-        "mov x4, x26"
+        "mov x4, x21"
       ]
     },
     "sub rax, 256": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs x26, x27, #0x100 (256)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs x21, x20, #0x100 (256)",
+        "mov x26, x21",
         "cfinv",
-        "mov x4, x26"
+        "mov x4, x21"
       ]
     },
     "xor eax, 256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
-        "eor w4, w4, #0x100",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x4",
+        "eor w21, w20, #0x100",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "xor rax, 256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
-        "eor x4, x4, #0x100",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x4",
+        "eor x21, x20, #0x100",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst x21, x21"
       ]
     },
     "cmp eax, 256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /7",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs w26, w27, #0x100 (256)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs w21, w20, #0x100 (256)",
+        "mov x26, x21",
         "cfinv"
       ]
     },
     "cmp rax, 256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /7",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs x26, x27, #0x100 (256)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs x21, x20, #0x100 (256)",
+        "mov x26, x21",
         "cfinv"
       ]
     },
     "add ax, -256": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xff00",
-        "mov x27, x4",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w27, w20",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #16"
+        "add w22, w21, w20",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x4, x20"
       ]
     },
     "add eax, -256": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffff00",
-        "mov x27, x4",
-        "adds w26, w27, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adds w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "add rax, -256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs x26, x27, #0x100 (256)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs x21, x20, #0x100 (256)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "or eax, -256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
-        "orr w4, w4, #0xffffff00",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x4",
+        "orr w21, w20, #0xffffff00",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "or rax, -256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
-        "orr x4, x4, #0xffffffffffffff00",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x4",
+        "orr x21, x20, #0xffffffffffffff00",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst x21, x21"
       ]
     },
     "adc eax, -256": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffff00",
-        "mov x27, x4",
-        "adcs w26, w27, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adcs w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "adc rax, -256": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffff00",
-        "mov x27, x4",
-        "adcs x26, x27, x20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adcs x22, x21, x20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sbb eax, -256": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffff00",
-        "mov x27, x4",
+        "mov x21, x4",
+        "mov x27, x21",
         "cfinv",
-        "sbcs w26, w27, w20",
+        "sbcs w22, w21, w20",
         "cfinv",
-        "mov x4, x26"
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sbb rax, -256": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffff00",
-        "mov x27, x4",
+        "mov x21, x4",
+        "mov x27, x21",
         "cfinv",
-        "sbcs x26, x27, x20",
+        "sbcs x22, x21, x20",
         "cfinv",
-        "mov x4, x26"
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "and eax, -256": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
-        "ands w26, w4, #0xffffff00",
-        "mov x4, x26"
+        "mov x20, x4",
+        "ands w21, w20, #0xffffff00",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "and rax, -256": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
-        "ands x26, x4, #0xffffffffffffff00",
-        "mov x4, x26"
+        "mov x20, x4",
+        "ands x21, x20, #0xffffffffffffff00",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "sub eax, -256": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffff00",
-        "mov x27, x4",
-        "subs w26, w27, w20",
+        "mov x21, x4",
+        "mov x27, x21",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "cfinv",
-        "mov x4, x26"
+        "mov x4, x22"
       ]
     },
     "sub rax, -256": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "adds x26, x27, #0x100 (256)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "adds x21, x20, #0x100 (256)",
+        "mov x26, x21",
         "cfinv",
-        "mov x4, x26"
+        "mov x4, x21"
       ]
     },
     "xor eax, -256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
-        "eor w4, w4, #0xffffff00",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x4",
+        "eor w21, w20, #0xffffff00",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "xor rax, -256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
-        "eor x4, x4, #0xffffffffffffff00",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x4",
+        "eor x21, x20, #0xffffffffffffff00",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst x21, x21"
       ]
     },
     "cmp eax, -256": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /7",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffff00",
-        "mov x27, x4",
-        "subs w26, w27, w20",
+        "mov x21, x4",
+        "mov x27, x21",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "cmp rax, -256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /7",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "adds x26, x27, #0x100 (256)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "adds x21, x20, #0x100 (256)",
+        "mov x26, x21",
         "cfinv"
       ]
     },
     "add ax, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w27, #0x1 (1)",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #16"
+        "add w20, w21, #0x1 (1)",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "add eax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "adds w26, w27, #0x1 (1)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mov x27, x20",
+        "adds w21, w20, #0x1 (1)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "add rax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "adds x26, x27, #0x1 (1)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mov x27, x20",
+        "adds x21, x20, #0x1 (1)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "or eax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /1",
       "ExpectedArm64ASM": [
-        "orr w4, w4, #0x1",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x4",
+        "orr w21, w20, #0x1",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "or rax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /1",
       "ExpectedArm64ASM": [
-        "orr x4, x4, #0x1",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x4",
+        "orr x21, x20, #0x1",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst x21, x21"
       ]
     },
     "adc eax, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "adcs w26, w27, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adcs w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "adc rax, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "adcs x26, x27, x20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adcs x22, x21, x20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sbb eax, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x83 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
+        "mov x21, x4",
+        "mov x27, x21",
         "cfinv",
-        "sbcs w26, w27, w20",
+        "sbcs w22, w21, w20",
         "cfinv",
-        "mov x4, x26"
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sbb rax, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x83 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
+        "mov x21, x4",
+        "mov x27, x21",
         "cfinv",
-        "sbcs x26, x27, x20",
+        "sbcs x22, x21, x20",
         "cfinv",
-        "mov x4, x26"
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "and eax, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP1 0x83 /4",
       "ExpectedArm64ASM": [
-        "ands w26, w4, #0x1",
-        "mov x4, x26"
+        "mov x20, x4",
+        "ands w21, w20, #0x1",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "and rax, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP1 0x83 /4",
       "ExpectedArm64ASM": [
-        "ands x26, x4, #0x1",
-        "mov x4, x26"
+        "mov x20, x4",
+        "ands x21, x20, #0x1",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "sub eax, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /5",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs w26, w27, #0x1 (1)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs w21, w20, #0x1 (1)",
+        "mov x26, x21",
         "cfinv",
-        "mov x4, x26"
+        "mov x4, x21"
       ]
     },
     "sub rax, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /5",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs x26, x27, #0x1 (1)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs x21, x20, #0x1 (1)",
+        "mov x26, x21",
         "cfinv",
-        "mov x4, x26"
+        "mov x4, x21"
       ]
     },
     "xor eax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /6",
       "ExpectedArm64ASM": [
-        "eor w4, w4, #0x1",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x4",
+        "eor w21, w20, #0x1",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "xor rax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /6",
       "ExpectedArm64ASM": [
-        "eor x4, x4, #0x1",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x4",
+        "eor x21, x20, #0x1",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst x21, x21"
       ]
     },
     "cmp eax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /7",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs w26, w27, #0x1 (1)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs w21, w20, #0x1 (1)",
+        "mov x26, x21",
         "cfinv"
       ]
     },
     "cmp rax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /7",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs x26, x27, #0x1 (1)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs x21, x20, #0x1 (1)",
+        "mov x26, x21",
         "cfinv"
       ]
     },
     "add ax, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "mvn w27, w4",
-        "lsl w0, w4, #16",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w4, w20",
-        "bfxil x4, x26, #0, #16"
+        "add w22, w21, w20",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x4, x20"
       ]
     },
     "add eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "adds w26, w4, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "adds w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "add rax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
-        "mvn w27, w4",
-        "subs x26, x4, #0x1 (1)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mvn w21, w20",
+        "mov x27, x21",
+        "subs x21, x20, #0x1 (1)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "or eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /-1",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "orr w4, w4, w20",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x21, x4",
+        "orr w22, w21, w20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "or rax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /-1",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "orr x4, x4, x20",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x21, x4",
+        "orr x22, x21, x20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "adc eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x83 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "adcs w26, w4, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "adcs w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "adc rax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x83 /2",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "mvn w27, w4",
-        "adcs x26, x4, x20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "adcs x22, x21, x20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sbb eax, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP1 0x83 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
         "cfinv",
-        "sbcs w26, w4, w20",
+        "sbcs w22, w21, w20",
         "cfinv",
-        "mov x4, x26"
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sbb rax, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP1 0x83 /3",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "mvn w27, w4",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
         "cfinv",
-        "sbcs x26, x4, x20",
+        "sbcs x22, x21, x20",
         "cfinv",
-        "mov x4, x26"
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "and eax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "ands w26, w4, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "ands w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "and rax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /4",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "ands x26, x4, x20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "ands x22, x21, x20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sub eax, -1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x83 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "subs w26, w4, w20",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "cfinv",
-        "mov x4, x26"
+        "mov x4, x22"
       ]
     },
     "sub rax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x83 /5",
       "ExpectedArm64ASM": [
-        "mvn w27, w4",
-        "adds x26, x4, #0x1 (1)",
+        "mov x20, x4",
+        "mvn w21, w20",
+        "mov x27, x21",
+        "adds x21, x20, #0x1 (1)",
+        "mov x26, x21",
         "cfinv",
-        "mov x4, x26"
+        "mov x4, x21"
       ]
     },
     "xor eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "eor w4, w4, w20",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x21, x4",
+        "eor w22, w21, w20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "xor rax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /6",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "eor x4, x4, x20",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x21, x4",
+        "eor x22, x21, x20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "cmp eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x83 /7",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "subs w26, w4, w20",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "cfinv"
       ]
     },
     "cmp rax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /7",
       "ExpectedArm64ASM": [
-        "mvn w27, w4",
-        "adds x26, x4, #0x1 (1)",
+        "mov x20, x4",
+        "mvn w21, w20",
+        "mov x27, x21",
+        "adds x21, x20, #0x1 (1)",
+        "mov x26, x21",
         "cfinv"
       ]
     },
     "rol al, 2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xC0 /0",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "bfi w20, w4, #8, #8",
-        "bfi w20, w20, #16, #16",
-        "ror w20, w20, #30",
-        "bfxil x4, x20, #0, #8",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "mov w21, w20",
+        "bfi w21, w20, #8, #8",
+        "mov w22, w21",
+        "bfi w22, w21, #16, #16",
+        "ror w21, w22, #30",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "ror al, 2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP2 0xC0 /1",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "bfi w20, w4, #8, #8",
-        "ror w20, w20, #2",
-        "bfxil x4, x20, #0, #8",
-        "rmif x20, #6, #nzCv"
+        "mov x20, x4",
+        "mov w21, w20",
+        "bfi w21, w20, #8, #8",
+        "ror w22, w21, #2",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #8",
+        "mov x4, x21",
+        "rmif x22, #6, #nzCv"
       ]
     },
     "rcl al, 2": {
-      "ExpectedInstructionCount": 20,
+      "ExpectedInstructionCount": 32,
       "Comment": "GROUP2 0xC0 /2",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "mov w21, #0x0",
-        "cset w22, hs",
-        "bfi x21, x20, #55, #8",
-        "bfi x21, x22, #63, #1",
-        "bfi x21, x20, #46, #8",
-        "bfi x21, x22, #54, #1",
-        "bfi x21, x20, #37, #8",
-        "bfi x21, x22, #45, #1",
-        "bfi x21, x20, #28, #8",
-        "bfi x21, x22, #36, #1",
-        "bfi x21, x20, #19, #8",
-        "bfi x21, x22, #27, #1",
-        "mov x0, x21",
-        "bfxil x0, x20, #0, #8",
-        "mov x20, x0",
-        "ror x21, x20, #62",
-        "bfxil x4, x21, #0, #8",
-        "ror x20, x20, #61",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "mov w22, #0x0",
+        "cset w23, hs",
+        "mov x24, x22",
+        "bfi x24, x21, #55, #8",
+        "mov x22, x24",
+        "bfi x22, x23, #63, #1",
+        "mov x24, x22",
+        "bfi x24, x21, #46, #8",
+        "mov x22, x24",
+        "bfi x22, x23, #54, #1",
+        "mov x24, x22",
+        "bfi x24, x21, #37, #8",
+        "mov x22, x24",
+        "bfi x22, x23, #45, #1",
+        "mov x24, x22",
+        "bfi x24, x21, #28, #8",
+        "mov x22, x24",
+        "bfi x22, x23, #36, #1",
+        "mov x24, x22",
+        "bfi x24, x21, #19, #8",
+        "mov x22, x24",
+        "bfi x22, x23, #27, #1",
+        "mov x23, x22",
+        "bfxil x23, x21, #0, #8",
+        "ror x21, x23, #62",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "ror x20, x23, #61",
+        "rmif x20, #63, #nzCv"
       ]
     },
     "rcr al, 2": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP2 0xC0 /3",
       "ExpectedArm64ASM": [
         "cset w20, hs",
-        "uxtb w21, w4",
-        "mov x0, x21",
-        "bfi x0, x20, #8, #1",
-        "mov x20, x0",
-        "bfi x20, x20, #9, #9",
-        "bfi x20, x20, #18, #18",
-        "bfi x20, x20, #36, #9",
-        "lsr w21, w20, #2",
-        "bfxil x4, x21, #0, #8",
+        "mov x21, x4",
+        "uxtb w22, w21",
+        "mov x23, x22",
+        "bfi x23, x20, #8, #1",
+        "mov x20, x23",
+        "bfi x20, x23, #9, #9",
+        "mov x22, x20",
+        "bfi x22, x20, #18, #18",
+        "mov x20, x22",
+        "bfi x20, x22, #36, #9",
+        "lsr w22, w20, #2",
+        "mov x23, x21",
+        "bfxil x23, x22, #0, #8",
+        "mov x4, x23",
         "rmif x20, #0, #nzCv"
       ]
     },
     "shl al, 2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP2 0xC0 /4",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "lsl w26, w20, #2",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #5, #nzCv"
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "lsl w22, w21, #2",
+        "mov x23, x20",
+        "bfxil x23, x22, #0, #8",
+        "mov x4, x23",
+        "cmn wzr, w22, lsl #24",
+        "rmif x21, #5, #nzCv",
+        "mov x26, x22"
       ]
     },
     "shr al, 2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP2 0xC0 /5",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "lsr w26, w20, #2",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #0, #nzCv"
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "lsr w22, w21, #2",
+        "mov x23, x20",
+        "bfxil x23, x22, #0, #8",
+        "mov x4, x23",
+        "cmn wzr, w22, lsl #24",
+        "rmif x21, #0, #nzCv",
+        "mov x26, x22"
       ]
     },
     "sar al, 2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xC0 /7",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "sxtb x20, w20",
-        "asr x26, x20, #2",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #0, #nzCv"
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "sxtb x22, w21",
+        "asr x21, x22, #2",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #8",
+        "mov x4, x23",
+        "cmn wzr, w21, lsl #24",
+        "rmif x22, #0, #nzCv",
+        "mov x26, x21"
       ]
     },
     "rol ax, 2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP2 0xC1 /0",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "bfi w20, w4, #16, #16",
-        "ror w20, w20, #30",
-        "bfxil x4, x20, #0, #16",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "mov w21, w20",
+        "bfi w21, w20, #16, #16",
+        "ror w22, w21, #30",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21",
+        "rmif x22, #63, #nzCv"
       ]
     },
     "rol eax, 2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP2 0xC1 /0",
       "ExpectedArm64ASM": [
-        "ror w4, w4, #30",
-        "rmif x4, #63, #nzCv"
+        "mov x20, x4",
+        "ror w21, w20, #30",
+        "mov x4, x21",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "rol rax, 2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP2 0xC1 /0",
       "ExpectedArm64ASM": [
-        "ror x4, x4, #62",
-        "rmif x4, #63, #nzCv"
+        "mov x20, x4",
+        "ror x21, x20, #62",
+        "mov x4, x21",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "ror ax, 2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP2 0xC1 /1",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "bfi w20, w4, #16, #16",
-        "ror w20, w20, #2",
-        "bfxil x4, x20, #0, #16",
-        "rmif x20, #14, #nzCv"
+        "mov x20, x4",
+        "mov w21, w20",
+        "bfi w21, w20, #16, #16",
+        "ror w22, w21, #2",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21",
+        "rmif x22, #14, #nzCv"
       ]
     },
     "ror eax, 2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP2 0xC1 /1",
       "ExpectedArm64ASM": [
-        "ror w4, w4, #2",
-        "rmif x4, #30, #nzCv"
+        "mov x20, x4",
+        "ror w21, w20, #2",
+        "mov x4, x21",
+        "rmif x21, #30, #nzCv"
       ]
     },
     "ror rax, 2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP2 0xC1 /1",
       "ExpectedArm64ASM": [
-        "ror x4, x4, #2",
-        "rmif x4, #62, #nzCv"
+        "mov x20, x4",
+        "ror x21, x20, #2",
+        "mov x4, x21",
+        "rmif x21, #62, #nzCv"
       ]
     },
     "rcl ax, 2": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 24,
       "Comment": "GROUP2 0xC1 /2",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "mov w21, #0x0",
-        "cset w22, hs",
-        "bfi x21, x20, #47, #16",
-        "bfi x21, x22, #63, #1",
-        "bfi x21, x20, #30, #16",
-        "bfi x21, x22, #46, #1",
-        "bfi x21, x20, #13, #16",
-        "bfi x21, x22, #29, #1",
-        "mov x0, x21",
-        "bfxil x0, x20, #0, #16",
-        "mov x20, x0",
-        "ror x21, x20, #62",
-        "bfxil x4, x21, #0, #16",
-        "ror x20, x20, #61",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "mov w22, #0x0",
+        "cset w23, hs",
+        "mov x24, x22",
+        "bfi x24, x21, #47, #16",
+        "mov x22, x24",
+        "bfi x22, x23, #63, #1",
+        "mov x24, x22",
+        "bfi x24, x21, #30, #16",
+        "mov x22, x24",
+        "bfi x22, x23, #46, #1",
+        "mov x24, x22",
+        "bfi x24, x21, #13, #16",
+        "mov x22, x24",
+        "bfi x22, x23, #29, #1",
+        "mov x23, x22",
+        "bfxil x23, x21, #0, #16",
+        "ror x21, x23, #62",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22",
+        "ror x20, x23, #61",
         "rmif x20, #63, #nzCv"
       ]
     },
     "rcl eax, 2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP2 0xC1 /2",
       "ExpectedArm64ASM": [
-        "lsl w20, w4, #2",
-        "cset w21, hs",
-        "orr w20, w20, w4, lsr #31",
-        "rmif x4, #29, #nzCv",
-        "orr w4, w20, w21, lsl #1"
+        "mov x20, x4",
+        "lsl w21, w20, #2",
+        "cset w22, hs",
+        "orr w23, w21, w20, lsr #31",
+        "rmif x20, #29, #nzCv",
+        "orr w20, w23, w22, lsl #1",
+        "mov x4, x20"
       ]
     },
     "rcl rax, 2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP2 0xC1 /2",
       "ExpectedArm64ASM": [
-        "lsl x20, x4, #2",
-        "cset w21, hs",
-        "orr x20, x20, x4, lsr #63",
-        "rmif x4, #61, #nzCv",
-        "orr x4, x20, x21, lsl #1"
+        "mov x20, x4",
+        "lsl x21, x20, #2",
+        "cset w22, hs",
+        "orr x23, x21, x20, lsr #63",
+        "rmif x20, #61, #nzCv",
+        "orr x20, x23, x22, lsl #1",
+        "mov x4, x20"
       ]
     },
     "rcr ax, 2": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP2 0xC1 /3",
       "ExpectedArm64ASM": [
         "cset w20, hs",
-        "uxth w21, w4",
-        "mov x0, x21",
-        "bfi x0, x20, #16, #1",
-        "mov x20, x0",
-        "bfi x20, x20, #17, #17",
-        "bfi x20, x20, #34, #17",
-        "lsr w21, w20, #2",
-        "bfxil x4, x21, #0, #16",
-        "rmif x20, #0, #nzCv"
+        "mov x21, x4",
+        "uxth w22, w21",
+        "mov x23, x22",
+        "bfi x23, x20, #16, #1",
+        "mov x20, x23",
+        "bfi x20, x23, #17, #17",
+        "mov x22, x20",
+        "bfi x22, x20, #34, #17",
+        "lsr w20, w22, #2",
+        "mov x23, x21",
+        "bfxil x23, x20, #0, #16",
+        "mov x4, x23",
+        "rmif x22, #0, #nzCv"
       ]
     },
     "rcr eax, 2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP2 0xC1 /3",
       "ExpectedArm64ASM": [
-        "lsr w20, w4, #2",
-        "cset w21, hs",
-        "orr w20, w20, w4, lsl #31",
-        "rmif x4, #0, #nzCv",
-        "orr w4, w20, w21, lsl #30"
+        "mov x20, x4",
+        "lsr w21, w20, #2",
+        "cset w22, hs",
+        "orr w23, w21, w20, lsl #31",
+        "rmif x20, #0, #nzCv",
+        "orr w20, w23, w22, lsl #30",
+        "mov x4, x20"
       ]
     },
     "rcr rax, 2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP2 0xC1 /3",
       "ExpectedArm64ASM": [
-        "lsr x20, x4, #2",
-        "cset w21, hs",
-        "orr x20, x20, x4, lsl #63",
-        "rmif x4, #0, #nzCv",
-        "orr x4, x20, x21, lsl #62"
+        "mov x20, x4",
+        "lsr x21, x20, #2",
+        "cset w22, hs",
+        "orr x23, x21, x20, lsl #63",
+        "rmif x20, #0, #nzCv",
+        "orr x20, x23, x22, lsl #62",
+        "mov x4, x20"
       ]
     },
     "shl ax, 2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP2 0xC1 /4",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "lsl w26, w20, #2",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #13, #nzCv"
+        "mov x20, x4",
+        "uxth w21, w20",
+        "lsl w22, w21, #2",
+        "mov x23, x20",
+        "bfxil x23, x22, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w22, lsl #16",
+        "rmif x21, #13, #nzCv",
+        "mov x26, x22"
       ]
     },
     "shl eax, 2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP2 0xC1 /4",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "lsl w4, w20, #2",
-        "tst w4, w4",
-        "rmif x20, #29, #nzCv",
-        "mov x26, x4"
+        "mov x20, x4",
+        "mov w21, w20",
+        "lsl w20, w21, #2",
+        "mov x4, x20",
+        "tst w20, w20",
+        "rmif x21, #29, #nzCv",
+        "mov x26, x20"
       ]
     },
     "shl rax, 2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP2 0xC1 /4",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "lsl x4, x20, #2",
-        "tst x4, x4",
+        "lsl x21, x20, #2",
+        "mov x4, x21",
+        "tst x21, x21",
         "rmif x20, #61, #nzCv",
-        "mov x26, x4"
+        "mov x26, x21"
       ]
     },
     "shr ax, 2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP2 0xC1 /5",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "lsr w26, w20, #2",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #0, #nzCv"
+        "mov x20, x4",
+        "uxth w21, w20",
+        "lsr w22, w21, #2",
+        "mov x23, x20",
+        "bfxil x23, x22, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w22, lsl #16",
+        "rmif x21, #0, #nzCv",
+        "mov x26, x22"
       ]
     },
     "shr eax, 2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP2 0xC1 /5",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "lsr w4, w20, #2",
-        "tst w4, w4",
-        "rmif x20, #0, #nzCv",
-        "mov x26, x4"
+        "mov x20, x4",
+        "mov w21, w20",
+        "lsr w20, w21, #2",
+        "mov x4, x20",
+        "tst w20, w20",
+        "rmif x21, #0, #nzCv",
+        "mov x26, x20"
       ]
     },
     "shr rax, 2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP2 0xC1 /5",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "lsr x4, x20, #2",
-        "tst x4, x4",
+        "lsr x21, x20, #2",
+        "mov x4, x21",
+        "tst x21, x21",
         "rmif x20, #0, #nzCv",
-        "mov x26, x4"
+        "mov x26, x21"
       ]
     },
     "sar ax, 2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xC1 /7",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "sxth x20, w20",
-        "asr x26, x20, #2",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #0, #nzCv"
+        "mov x20, x4",
+        "uxth w21, w20",
+        "sxth x22, w21",
+        "asr x21, x22, #2",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w21, lsl #16",
+        "rmif x22, #0, #nzCv",
+        "mov x26, x21"
       ]
     },
     "sar eax, 2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP2 0xC1 /7",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "asr w4, w20, #2",
-        "tst w4, w4",
-        "rmif x20, #0, #nzCv",
-        "mov x26, x4"
+        "mov x20, x4",
+        "mov w21, w20",
+        "asr w20, w21, #2",
+        "mov x4, x20",
+        "tst w20, w20",
+        "rmif x21, #0, #nzCv",
+        "mov x26, x20"
       ]
     },
     "sar rax, 2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP2 0xC1 /7",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "asr x4, x20, #2",
-        "tst x4, x4",
+        "asr x21, x20, #2",
+        "mov x4, x21",
+        "tst x21, x21",
         "rmif x20, #0, #nzCv",
-        "mov x26, x4"
+        "mov x26, x21"
       ]
     },
     "rol al, 1": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP2 0xd0 /0",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "bfi w20, w4, #8, #8",
-        "bfi w20, w20, #16, #16",
-        "ror w20, w20, #31",
-        "bfxil x4, x20, #0, #8",
-        "rmif x20, #63, #nzCv",
-        "eor w20, w20, w20, lsr #7",
+        "mov x20, x4",
+        "mov w21, w20",
+        "bfi w21, w20, #8, #8",
+        "mov w22, w21",
+        "bfi w22, w21, #16, #16",
+        "ror w21, w22, #31",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "rmif x21, #63, #nzCv",
+        "eor w20, w21, w21, lsr #7",
         "rmif x20, #0, #nzcV"
       ]
     },
     "ror al, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xd0 /1",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "bfi w20, w4, #8, #8",
-        "ror w20, w20, #1",
-        "bfxil x4, x20, #0, #8",
-        "rmif x20, #6, #nzCv",
-        "eor w20, w20, w20, lsr #1",
+        "mov x20, x4",
+        "mov w21, w20",
+        "bfi w21, w20, #8, #8",
+        "ror w22, w21, #1",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #8",
+        "mov x4, x21",
+        "rmif x22, #6, #nzCv",
+        "eor w20, w22, w22, lsr #1",
         "rmif x20, #6, #nzcV"
       ]
     },
     "rcl al, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xd0 /2",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "cset w21, hs",
-        "orr w21, w21, w20, lsl #1",
-        "bfxil x4, x21, #0, #8",
-        "rmif x20, #6, #nzCv",
-        "eor w20, w21, w20",
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "cset w22, hs",
+        "orr w23, w22, w21, lsl #1",
+        "mov x22, x20",
+        "bfxil x22, x23, #0, #8",
+        "mov x4, x22",
+        "rmif x21, #6, #nzCv",
+        "eor w20, w23, w21",
         "rmif x20, #7, #nzcV"
       ]
     },
     "rcr al, 1": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP2 0xd0 /3",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "cset w21, hs",
-        "rmif x20, #63, #nzCv",
-        "ubfx w20, w20, #1, #7",
-        "bfi w20, w21, #7, #1",
-        "bfxil x4, x20, #0, #8",
-        "eor w20, w20, w20, lsr #1",
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "cset w22, hs",
+        "rmif x21, #63, #nzCv",
+        "ubfx w23, w21, #1, #7",
+        "mov w21, w23",
+        "bfi w21, w22, #7, #1",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "eor w20, w21, w21, lsr #1",
         "rmif x20, #6, #nzcV"
       ]
     },
     "shl al, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP2 0xd0 /4",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "lsl w26, w20, #1",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #6, #nzCv",
-        "eor w20, w26, w20",
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "lsl w22, w21, #1",
+        "mov x23, x20",
+        "bfxil x23, x22, #0, #8",
+        "mov x4, x23",
+        "cmn wzr, w22, lsl #24",
+        "rmif x21, #6, #nzCv",
+        "mov x26, x22",
+        "eor w20, w22, w21",
         "rmif x20, #7, #nzcV"
       ]
     },
     "shr al, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xd0 /5",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "lsr w26, w20, #1",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv",
-        "rmif x20, #7, #nzcV"
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "lsr w22, w21, #1",
+        "mov x23, x20",
+        "bfxil x23, x22, #0, #8",
+        "mov x4, x23",
+        "cmn wzr, w22, lsl #24",
+        "rmif x21, #63, #nzCv",
+        "mov x26, x22",
+        "rmif x21, #7, #nzcV"
       ]
     },
     "sar al, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xd0 /7",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "sxtb x20, w20",
-        "asr x26, x20, #1",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "sxtb x22, w21",
+        "asr x21, x22, #1",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #8",
+        "mov x4, x23",
+        "cmn wzr, w21, lsl #24",
+        "rmif x22, #63, #nzCv",
+        "mov x26, x21"
       ]
     },
     "rol ax, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xd1 /0",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "bfi w20, w4, #16, #16",
-        "ror w20, w20, #31",
-        "bfxil x4, x20, #0, #16",
-        "rmif x20, #63, #nzCv",
-        "eor w20, w20, w20, lsr #15",
+        "mov x20, x4",
+        "mov w21, w20",
+        "bfi w21, w20, #16, #16",
+        "ror w22, w21, #31",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21",
+        "rmif x22, #63, #nzCv",
+        "eor w20, w22, w22, lsr #15",
         "rmif x20, #0, #nzcV"
       ]
     },
     "rol eax, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP2 0xd1 /0",
       "ExpectedArm64ASM": [
-        "ror w4, w4, #31",
-        "rmif x4, #63, #nzCv",
-        "eor w20, w4, w4, lsr #31",
+        "mov x20, x4",
+        "ror w21, w20, #31",
+        "mov x4, x21",
+        "rmif x21, #63, #nzCv",
+        "eor w20, w21, w21, lsr #31",
         "rmif x20, #0, #nzcV"
       ]
     },
     "rol rax, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP2 0xd1 /0",
       "ExpectedArm64ASM": [
-        "ror x4, x4, #63",
-        "rmif x4, #63, #nzCv",
-        "eor x20, x4, x4, lsr #63",
+        "mov x20, x4",
+        "ror x21, x20, #63",
+        "mov x4, x21",
+        "rmif x21, #63, #nzCv",
+        "eor x20, x21, x21, lsr #63",
         "rmif x20, #0, #nzcV"
       ]
     },
     "ror ax, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xd1 /1",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "bfi w20, w4, #16, #16",
-        "ror w20, w20, #1",
-        "bfxil x4, x20, #0, #16",
-        "rmif x20, #14, #nzCv",
-        "eor w20, w20, w20, lsr #1",
+        "mov x20, x4",
+        "mov w21, w20",
+        "bfi w21, w20, #16, #16",
+        "ror w22, w21, #1",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21",
+        "rmif x22, #14, #nzCv",
+        "eor w20, w22, w22, lsr #1",
         "rmif x20, #14, #nzcV"
       ]
     },
     "ror eax, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP2 0xd1 /1",
       "ExpectedArm64ASM": [
-        "ror w4, w4, #1",
-        "rmif x4, #30, #nzCv",
-        "eor w20, w4, w4, lsr #1",
+        "mov x20, x4",
+        "ror w21, w20, #1",
+        "mov x4, x21",
+        "rmif x21, #30, #nzCv",
+        "eor w20, w21, w21, lsr #1",
         "rmif x20, #30, #nzcV"
       ]
     },
     "ror rax, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP2 0xd1 /1",
       "ExpectedArm64ASM": [
-        "ror x4, x4, #1",
-        "rmif x4, #62, #nzCv",
-        "eor x20, x4, x4, lsr #1",
+        "mov x20, x4",
+        "ror x21, x20, #1",
+        "mov x4, x21",
+        "rmif x21, #62, #nzCv",
+        "eor x20, x21, x21, lsr #1",
         "rmif x20, #62, #nzcV"
       ]
     },
     "rcl ax, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xd1 /2",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "cset w21, hs",
-        "orr w21, w21, w20, lsl #1",
-        "bfxil x4, x21, #0, #16",
-        "rmif x20, #14, #nzCv",
-        "eor w20, w21, w20",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "cset w22, hs",
+        "orr w23, w22, w21, lsl #1",
+        "mov x22, x20",
+        "bfxil x22, x23, #0, #16",
+        "mov x4, x22",
+        "rmif x21, #14, #nzCv",
+        "eor w20, w23, w21",
         "rmif x20, #15, #nzcV"
       ]
     },
     "rcl eax, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP2 0xd1 /2",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "cset w21, hs",
-        "orr w4, w21, w20, lsl #1",
-        "rmif x20, #30, #nzCv",
-        "eor w20, w4, w20",
+        "mov x20, x4",
+        "mov w21, w20",
+        "cset w20, hs",
+        "orr w22, w20, w21, lsl #1",
+        "mov x4, x22",
+        "rmif x21, #30, #nzCv",
+        "eor w20, w22, w21",
         "rmif x20, #31, #nzcV"
       ]
     },
     "rcl rax, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP2 0xd1 /2",
       "ExpectedArm64ASM": [
         "mov x20, x4",
         "cset w21, hs",
-        "orr x4, x21, x20, lsl #1",
+        "orr x22, x21, x20, lsl #1",
+        "mov x4, x22",
         "rmif x20, #62, #nzCv",
-        "eor x20, x4, x20",
-        "rmif x20, #63, #nzcV"
+        "eor x21, x22, x20",
+        "rmif x21, #63, #nzcV"
       ]
     },
     "rcr ax, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xd1 /3",
       "ExpectedArm64ASM": [
-        "cset w20, hs",
-        "rmif x4, #63, #nzCv",
-        "ubfx w21, w4, #1, #15",
-        "orr w20, w21, w20, lsl #15",
-        "bfxil x4, x20, #0, #16",
-        "eor x20, x20, x20, lsr #1",
+        "mov x20, x4",
+        "cset w21, hs",
+        "rmif x20, #63, #nzCv",
+        "ubfx w22, w20, #1, #15",
+        "orr w23, w22, w21, lsl #15",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #16",
+        "mov x4, x21",
+        "eor x20, x23, x23, lsr #1",
         "rmif x20, #14, #nzcV"
       ]
     },
     "rcr eax, 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP2 0xd1 /3",
       "ExpectedArm64ASM": [
-        "cset w20, hs",
-        "rmif x4, #63, #nzCv",
-        "extr w4, w20, w4, #1",
-        "eor x20, x4, x4, lsr #1",
+        "mov x20, x4",
+        "cset w21, hs",
+        "rmif x20, #63, #nzCv",
+        "extr w22, w21, w20, #1",
+        "mov x4, x22",
+        "eor x20, x22, x22, lsr #1",
         "rmif x20, #30, #nzcV"
       ]
     },
     "rcr rax, 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP2 0xd1 /3",
       "ExpectedArm64ASM": [
-        "cset w20, hs",
-        "rmif x4, #63, #nzCv",
-        "extr x4, x20, x4, #1",
-        "eor x20, x4, x4, lsr #1",
+        "mov x20, x4",
+        "cset w21, hs",
+        "rmif x20, #63, #nzCv",
+        "extr x22, x21, x20, #1",
+        "mov x4, x22",
+        "eor x20, x22, x22, lsr #1",
         "rmif x20, #62, #nzcV"
       ]
     },
     "shl ax, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP2 0xd1 /4",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "lsl w26, w20, #1",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #14, #nzCv",
-        "eor w20, w26, w20",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "lsl w22, w21, #1",
+        "mov x23, x20",
+        "bfxil x23, x22, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w22, lsl #16",
+        "rmif x21, #14, #nzCv",
+        "mov x26, x22",
+        "eor w20, w22, w21",
         "rmif x20, #15, #nzcV"
       ]
     },
     "shl eax, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP2 0xd1 /4",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "lsl w4, w20, #1",
-        "tst w4, w4",
-        "rmif x20, #30, #nzCv",
-        "mov x26, x4",
-        "eor w20, w4, w20",
-        "rmif x20, #31, #nzcV"
+        "mov x20, x4",
+        "mov w21, w20",
+        "lsl w20, w21, #1",
+        "mov x4, x20",
+        "tst w20, w20",
+        "rmif x21, #30, #nzCv",
+        "mov x26, x20",
+        "eor w22, w20, w21",
+        "rmif x22, #31, #nzcV"
       ]
     },
     "shl rax, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP2 0xd1 /4",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "lsl x4, x20, #1",
-        "tst x4, x4",
+        "lsl x21, x20, #1",
+        "mov x4, x21",
+        "tst x21, x21",
         "rmif x20, #62, #nzCv",
-        "mov x26, x4",
-        "eor x20, x4, x20",
-        "rmif x20, #63, #nzcV"
+        "mov x26, x21",
+        "eor x22, x21, x20",
+        "rmif x22, #63, #nzcV"
       ]
     },
     "shr ax, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xd1 /5",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "lsr w26, w20, #1",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #63, #nzCv",
-        "rmif x20, #15, #nzcV"
+        "mov x20, x4",
+        "uxth w21, w20",
+        "lsr w22, w21, #1",
+        "mov x23, x20",
+        "bfxil x23, x22, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w22, lsl #16",
+        "rmif x21, #63, #nzCv",
+        "mov x26, x22",
+        "rmif x21, #15, #nzcV"
       ]
     },
     "shr eax, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP2 0xd1 /5",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "lsr w4, w20, #1",
-        "tst w4, w4",
-        "rmif x20, #63, #nzCv",
-        "mov x26, x4",
-        "rmif x20, #31, #nzcV"
+        "mov x20, x4",
+        "mov w21, w20",
+        "lsr w20, w21, #1",
+        "mov x4, x20",
+        "tst w20, w20",
+        "rmif x21, #63, #nzCv",
+        "mov x26, x20",
+        "rmif x21, #31, #nzcV"
       ]
     },
     "shr rax, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP2 0xd1 /5",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "lsr x4, x20, #1",
-        "tst x4, x4",
+        "lsr x21, x20, #1",
+        "mov x4, x21",
+        "tst x21, x21",
         "rmif x20, #63, #nzCv",
-        "mov x26, x4",
+        "mov x26, x21",
         "rmif x20, #63, #nzcV"
       ]
     },
     "sar ax, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xd1 /7",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "sxth x20, w20",
-        "asr x26, x20, #1",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "uxth w21, w20",
+        "sxth x22, w21",
+        "asr x21, x22, #1",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w21, lsl #16",
+        "rmif x22, #63, #nzCv",
+        "mov x26, x21"
       ]
     },
     "sar eax, 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP2 0xd1 /7",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "asr w4, w20, #1",
-        "tst w4, w4",
-        "rmif x20, #63, #nzCv",
-        "mov x26, x4"
+        "mov x20, x4",
+        "mov w21, w20",
+        "asr w20, w21, #1",
+        "mov x4, x20",
+        "tst w20, w20",
+        "rmif x21, #63, #nzCv",
+        "mov x26, x20"
       ]
     },
     "sar rax, 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP2 0xd1 /7",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "asr x4, x20, #1",
-        "tst x4, x4",
+        "asr x21, x20, #1",
+        "mov x4, x21",
+        "tst x21, x21",
         "rmif x20, #63, #nzCv",
-        "mov x26, x4"
+        "mov x26, x21"
       ]
     },
     "rol al, cl": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 17,
       "Comment": "GROUP2 0xd2 /0",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x1f",
-        "cbz x20, #+0x28",
-        "mov w20, w4",
-        "bfi w20, w4, #8, #8",
-        "bfi w20, w20, #16, #16",
-        "neg w21, w5",
-        "ror w20, w20, w21",
-        "bfxil x4, x20, #0, #8",
+        "mov x20, x5",
+        "and x21, x20, #0x1f",
+        "cbz x21, #+0x3c",
+        "mov x20, x5",
+        "mov x21, x4",
+        "mov w22, w21",
+        "bfi w22, w21, #8, #8",
+        "mov w23, w22",
+        "bfi w23, w22, #16, #16",
+        "neg w22, w20",
+        "ror w20, w23, w22",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22",
         "rmif x20, #63, #nzCv",
-        "eor w20, w20, w20, lsr #7",
-        "rmif x20, #0, #nzcV"
+        "eor w21, w20, w20, lsr #7",
+        "rmif x21, #0, #nzcV"
       ]
     },
     "ror al, cl": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP2 0xd2 /1",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x1f",
-        "cbz x20, #+0x24",
-        "mov w20, w4",
-        "bfi w20, w4, #8, #8",
-        "bfi w20, w20, #16, #16",
-        "ror w20, w20, w5",
-        "bfxil x4, x20, #0, #8",
-        "rmif x20, #6, #nzCv",
-        "eor w20, w20, w20, lsr #1",
+        "mov x20, x5",
+        "and x21, x20, #0x1f",
+        "cbz x21, #+0x38",
+        "mov x20, x5",
+        "mov x21, x4",
+        "mov w22, w21",
+        "bfi w22, w21, #8, #8",
+        "mov w23, w22",
+        "bfi w23, w22, #16, #16",
+        "ror w22, w23, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x4, x20",
+        "rmif x22, #6, #nzCv",
+        "eor w20, w22, w22, lsr #1",
         "rmif x20, #6, #nzcV"
       ]
     },
     "rcl al, cl": {
-      "ExpectedInstructionCount": 28,
+      "ExpectedInstructionCount": 42,
       "Comment": "GROUP2 0xd2 /2",
       "ExpectedArm64ASM": [
-        "and w20, w5, #0x1f",
-        "cbz x20, #+0x6c",
-        "and w20, w5, #0x1f",
-        "uxtb w21, w4",
-        "mov w22, #0x0",
-        "cset w23, hs",
-        "bfi x22, x21, #55, #8",
-        "bfi x22, x23, #63, #1",
-        "bfi x22, x21, #46, #8",
-        "bfi x22, x23, #54, #1",
-        "bfi x22, x21, #37, #8",
-        "bfi x22, x23, #45, #1",
-        "bfi x22, x21, #28, #8",
-        "bfi x22, x23, #36, #1",
-        "bfi x22, x21, #19, #8",
-        "bfi x22, x23, #27, #1",
-        "mov x0, x22",
-        "bfxil x0, x21, #0, #8",
-        "mov x21, x0",
-        "neg w22, w20",
-        "ror x22, x21, x22",
-        "bfxil x4, x22, #0, #8",
-        "mov w23, #0x3f",
-        "sub x20, x23, x20",
-        "ror x20, x21, x20",
+        "mov x20, x5",
+        "and w21, w20, #0x1f",
+        "cbz x21, #+0xa0",
+        "mov x20, x5",
+        "and w21, w20, #0x1f",
+        "mov x20, x4",
+        "uxtb w22, w20",
+        "mov w23, #0x0",
+        "cset w24, hs",
+        "mov x25, x23",
+        "bfi x25, x22, #55, #8",
+        "mov x23, x25",
+        "bfi x23, x24, #63, #1",
+        "mov x25, x23",
+        "bfi x25, x22, #46, #8",
+        "mov x23, x25",
+        "bfi x23, x24, #54, #1",
+        "mov x25, x23",
+        "bfi x25, x22, #37, #8",
+        "mov x23, x25",
+        "bfi x23, x24, #45, #1",
+        "mov x25, x23",
+        "bfi x25, x22, #28, #8",
+        "mov x23, x25",
+        "bfi x23, x24, #36, #1",
+        "mov x25, x23",
+        "bfi x25, x22, #19, #8",
+        "mov x23, x25",
+        "bfi x23, x24, #27, #1",
+        "mov x24, x23",
+        "bfxil x24, x22, #0, #8",
+        "neg w22, w21",
+        "ror x23, x24, x22",
+        "mov x22, x20",
+        "bfxil x22, x23, #0, #8",
+        "mov x4, x22",
+        "mov w20, #0x3f",
+        "sub x22, x20, x21",
+        "ror x20, x24, x22",
         "rmif x20, #63, #nzCv",
-        "eor x20, x20, x22, lsr #7",
-        "rmif x20, #0, #nzcV"
+        "eor x21, x20, x23, lsr #7",
+        "rmif x21, #0, #nzcV"
       ]
     },
     "rcr al, cl": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 24,
       "Comment": "GROUP2 0xd2 /3",
       "ExpectedArm64ASM": [
-        "and w20, w5, #0x1f",
-        "cbz x20, #+0x40",
-        "cset w20, hs",
-        "uxtb w21, w4",
-        "mov x0, x21",
-        "bfi x0, x20, #8, #1",
-        "mov x20, x0",
-        "bfi x20, x20, #9, #9",
-        "bfi x20, x20, #18, #18",
-        "bfi x20, x20, #36, #9",
-        "lsr w21, w20, w5",
-        "bfxil x4, x21, #0, #8",
-        "sub w22, w5, #0x1 (1)",
-        "lsr w20, w20, w22",
+        "mov x20, x5",
+        "and w21, w20, #0x1f",
+        "cbz x21, #+0x58",
+        "mov x20, x5",
+        "cset w21, hs",
+        "mov x22, x4",
+        "uxtb w23, w22",
+        "mov x24, x23",
+        "bfi x24, x21, #8, #1",
+        "mov x21, x24",
+        "bfi x21, x24, #9, #9",
+        "mov x23, x21",
+        "bfi x23, x21, #18, #18",
+        "mov x21, x23",
+        "bfi x21, x23, #36, #9",
+        "lsr w23, w21, w20",
+        "mov x24, x22",
+        "bfxil x24, x23, #0, #8",
+        "mov x4, x24",
+        "sub w22, w20, #0x1 (1)",
+        "lsr w20, w21, w22",
         "rmif x20, #63, #nzCv",
-        "eor w20, w21, w21, lsr #1",
+        "eor w20, w23, w23, lsr #1",
         "rmif x20, #6, #nzcV"
       ]
     },
     "shl al, cl": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 20,
       "Comment": "GROUP2 0xd2 /4",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "uxtb w21, w5",
-        "lsl w22, w20, w21",
-        "bfxil x4, x22, #0, #8",
-        "cbz w21, #+0x24",
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "mov x22, x5",
+        "uxtb w23, w22",
+        "lsl w22, w21, w23",
+        "mov x24, x20",
+        "bfxil x24, x22, #0, #8",
+        "mov x4, x24",
+        "mov x20, x26",
+        "mov x24, x20",
+        "cbz w23, #+0x24",
         "cmn wzr, w22, lsl #24",
-        "mov x26, x22",
+        "mov x24, x22",
         "mov w0, #0x8",
-        "sub w0, w0, w21",
-        "lsr w0, w20, w0",
-        "eor w2, w20, w22",
+        "sub w0, w0, w23",
+        "lsr w0, w21, w0",
+        "eor w2, w21, w22",
         "rmif x0, #63, #nzCv",
-        "rmif x2, #7, #nzcV"
+        "rmif x2, #7, #nzcV",
+        "mov x26, x24"
       ]
     },
     "shr al, cl": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 19,
       "Comment": "GROUP2 0xd2 /5",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "uxtb w21, w5",
-        "lsr w22, w20, w21",
-        "bfxil x4, x22, #0, #8",
-        "cbz w21, #+0x20",
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "mov x22, x5",
+        "uxtb w23, w22",
+        "lsr w22, w21, w23",
+        "mov x24, x20",
+        "bfxil x24, x22, #0, #8",
+        "mov x4, x24",
+        "mov x20, x26",
+        "mov x24, x20",
+        "cbz w23, #+0x20",
         "cmn wzr, w22, lsl #24",
-        "mov x26, x22",
-        "sub x0, x21, #0x1 (1)",
-        "lsr w0, w20, w0",
-        "eor w2, w20, w22",
+        "mov x24, x22",
+        "sub x0, x23, #0x1 (1)",
+        "lsr w0, w21, w0",
+        "eor w2, w21, w22",
         "rmif x0, #63, #nzCv",
-        "rmif x2, #7, #nzcV"
+        "rmif x2, #7, #nzcV",
+        "mov x26, x24"
       ]
     },
     "sar al, cl": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 18,
       "Comment": "GROUP2 0xd2 /7",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "uxtb w21, w5",
-        "sxtb x20, w20",
-        "asr w22, w20, w21",
-        "bfxil x4, x22, #0, #8",
-        "cbz w21, #+0x18",
-        "cmn wzr, w22, lsl #24",
-        "mov x26, x22",
-        "sub x0, x21, #0x1 (1)",
-        "lsr w0, w20, w0",
-        "rmif x0, #63, #nzCv"
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "mov x22, x5",
+        "uxtb w23, w22",
+        "sxtb x22, w21",
+        "asr w21, w22, w23",
+        "mov x24, x20",
+        "bfxil x24, x21, #0, #8",
+        "mov x4, x24",
+        "mov x20, x26",
+        "mov x24, x20",
+        "cbz w23, #+0x18",
+        "cmn wzr, w21, lsl #24",
+        "mov x24, x21",
+        "sub x0, x23, #0x1 (1)",
+        "lsr w0, w22, w0",
+        "rmif x0, #63, #nzCv",
+        "mov x26, x24"
       ]
     },
     "rol ax, cl": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 15,
       "Comment": "GROUP2 0xd3 /0",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x1f",
-        "cbz x20, #+0x24",
-        "mov w20, w4",
-        "bfi w20, w4, #16, #16",
-        "neg w21, w5",
-        "ror w20, w20, w21",
-        "bfxil x4, x20, #0, #16",
+        "mov x20, x5",
+        "and x21, x20, #0x1f",
+        "cbz x21, #+0x34",
+        "mov x20, x5",
+        "mov x21, x4",
+        "mov w22, w21",
+        "bfi w22, w21, #16, #16",
+        "neg w23, w20",
+        "ror w20, w22, w23",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22",
         "rmif x20, #63, #nzCv",
-        "eor w20, w20, w20, lsr #15",
-        "rmif x20, #0, #nzcV"
+        "eor w21, w20, w20, lsr #15",
+        "rmif x21, #0, #nzcV"
       ]
     },
     "rol eax, cl": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP2 0xd3 /0",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x1f",
-        "cbz x20, #+0x18",
-        "neg w20, w5",
-        "ror w4, w4, w20",
-        "rmif x4, #63, #nzCv",
-        "eor w20, w4, w4, lsr #31",
-        "rmif x20, #0, #nzcV"
+        "mov x20, x5",
+        "and x21, x20, #0x1f",
+        "cbz x21, #+0x24",
+        "mov x20, x5",
+        "mov x21, x4",
+        "neg w22, w20",
+        "ror w20, w21, w22",
+        "mov x4, x20",
+        "rmif x20, #63, #nzCv",
+        "eor w21, w20, w20, lsr #31",
+        "rmif x21, #0, #nzcV"
       ]
     },
     "rol rax, cl": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP2 0xd3 /0",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x3f",
-        "cbz x20, #+0x18",
-        "neg x20, x5",
-        "ror x4, x4, x20",
-        "rmif x4, #63, #nzCv",
-        "eor x20, x4, x4, lsr #63",
-        "rmif x20, #0, #nzcV"
+        "mov x20, x5",
+        "and x21, x20, #0x3f",
+        "cbz x21, #+0x24",
+        "mov x20, x5",
+        "mov x21, x4",
+        "neg x22, x20",
+        "ror x20, x21, x22",
+        "mov x4, x20",
+        "rmif x20, #63, #nzCv",
+        "eor x21, x20, x20, lsr #63",
+        "rmif x21, #0, #nzcV"
       ]
     },
     "ror ax, cl": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP2 0xd3 /1",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x1f",
-        "cbz x20, #+0x20",
-        "mov w20, w4",
-        "bfi w20, w4, #16, #16",
-        "ror w20, w20, w5",
-        "bfxil x4, x20, #0, #16",
-        "rmif x20, #14, #nzCv",
-        "eor w20, w20, w20, lsr #1",
+        "mov x20, x5",
+        "and x21, x20, #0x1f",
+        "cbz x21, #+0x30",
+        "mov x20, x5",
+        "mov x21, x4",
+        "mov w22, w21",
+        "bfi w22, w21, #16, #16",
+        "ror w23, w22, w20",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x4, x20",
+        "rmif x23, #14, #nzCv",
+        "eor w20, w23, w23, lsr #1",
         "rmif x20, #14, #nzcV"
       ]
     },
     "ror eax, cl": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xd3 /1",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x1f",
-        "cbz x20, #+0x14",
-        "ror w4, w4, w5",
-        "rmif x4, #30, #nzCv",
-        "eor w20, w4, w4, lsr #1",
+        "mov x20, x5",
+        "and x21, x20, #0x1f",
+        "cbz x21, #+0x20",
+        "mov x20, x5",
+        "mov x21, x4",
+        "ror w22, w21, w20",
+        "mov x4, x22",
+        "rmif x22, #30, #nzCv",
+        "eor w20, w22, w22, lsr #1",
         "rmif x20, #30, #nzcV"
       ]
     },
     "ror rax, cl": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xd3 /1",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x3f",
-        "cbz x20, #+0x14",
-        "ror x4, x4, x5",
-        "rmif x4, #62, #nzCv",
-        "eor x20, x4, x4, lsr #1",
+        "mov x20, x5",
+        "and x21, x20, #0x3f",
+        "cbz x21, #+0x20",
+        "mov x20, x5",
+        "mov x21, x4",
+        "ror x22, x21, x20",
+        "mov x4, x22",
+        "rmif x22, #62, #nzCv",
+        "eor x20, x22, x22, lsr #1",
         "rmif x20, #62, #nzcV"
       ]
     },
     "rcl ax, cl": {
-      "ExpectedInstructionCount": 24,
+      "ExpectedInstructionCount": 34,
       "Comment": "GROUP2 0xd3 /2",
       "ExpectedArm64ASM": [
-        "and w20, w5, #0x1f",
-        "cbz x20, #+0x5c",
-        "and w20, w5, #0x1f",
-        "uxth w21, w4",
-        "mov w22, #0x0",
-        "cset w23, hs",
-        "bfi x22, x21, #47, #16",
-        "bfi x22, x23, #63, #1",
-        "bfi x22, x21, #30, #16",
-        "bfi x22, x23, #46, #1",
-        "bfi x22, x21, #13, #16",
-        "bfi x22, x23, #29, #1",
-        "mov x0, x22",
-        "bfxil x0, x21, #0, #16",
-        "mov x21, x0",
-        "neg w22, w20",
-        "ror x22, x21, x22",
-        "bfxil x4, x22, #0, #16",
-        "mov w23, #0x3f",
-        "sub x20, x23, x20",
-        "ror x20, x21, x20",
+        "mov x20, x5",
+        "and w21, w20, #0x1f",
+        "cbz x21, #+0x80",
+        "mov x20, x5",
+        "and w21, w20, #0x1f",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "mov w23, #0x0",
+        "cset w24, hs",
+        "mov x25, x23",
+        "bfi x25, x22, #47, #16",
+        "mov x23, x25",
+        "bfi x23, x24, #63, #1",
+        "mov x25, x23",
+        "bfi x25, x22, #30, #16",
+        "mov x23, x25",
+        "bfi x23, x24, #46, #1",
+        "mov x25, x23",
+        "bfi x25, x22, #13, #16",
+        "mov x23, x25",
+        "bfi x23, x24, #29, #1",
+        "mov x24, x23",
+        "bfxil x24, x22, #0, #16",
+        "neg w22, w21",
+        "ror x23, x24, x22",
+        "mov x22, x20",
+        "bfxil x22, x23, #0, #16",
+        "mov x4, x22",
+        "mov w20, #0x3f",
+        "sub x22, x20, x21",
+        "ror x20, x24, x22",
         "rmif x20, #63, #nzCv",
-        "eor x20, x20, x22, lsr #15",
-        "rmif x20, #0, #nzcV"
+        "eor x21, x20, x23, lsr #15",
+        "rmif x21, #0, #nzcV"
       ]
     },
     "rcl eax, cl": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 18,
       "Comment": "GROUP2 0xd3 /2",
       "ExpectedArm64ASM": [
-        "and w20, w5, #0x1f",
-        "cbz x20, #+0x34",
-        "lsl w20, w4, w5",
-        "cset w21, hs",
-        "neg w22, w5",
-        "lsr w23, w4, w22",
-        "orr w20, w20, w23, lsr #1",
-        "lsr w22, w4, w22",
+        "mov x20, x5",
+        "and w21, w20, #0x1f",
+        "cbz x21, #+0x40",
+        "mov x20, x5",
+        "mov x21, x4",
+        "lsl w22, w21, w20",
+        "cset w23, hs",
+        "neg w24, w20",
+        "lsr w25, w21, w24",
+        "orr w30, w22, w25, lsr #1",
+        "lsr w22, w21, w24",
         "rmif x22, #63, #nzCv",
-        "sub w23, w5, #0x1 (1)",
-        "lsl w21, w21, w23",
-        "orr w4, w20, w21",
-        "eor w20, w4, w22, lsl #31",
-        "rmif x20, #31, #nzcV"
+        "sub w21, w20, #0x1 (1)",
+        "lsl w20, w23, w21",
+        "orr w21, w30, w20",
+        "eor w20, w21, w22, lsl #31",
+        "rmif x20, #31, #nzcV",
+        "mov x4, x21"
       ]
     },
     "rcl rax, cl": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 18,
       "Comment": "GROUP2 0xd3 /2",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x3f",
-        "cbz x20, #+0x34",
-        "lsl x20, x4, x5",
-        "cset w21, hs",
-        "neg x22, x5",
-        "lsr x23, x4, x22",
-        "orr x20, x20, x23, lsr #1",
-        "lsr x22, x4, x22",
+        "mov x20, x5",
+        "and x21, x20, #0x3f",
+        "cbz x21, #+0x40",
+        "mov x20, x5",
+        "mov x21, x4",
+        "lsl x22, x21, x20",
+        "cset w23, hs",
+        "neg x24, x20",
+        "lsr x25, x21, x24",
+        "orr x30, x22, x25, lsr #1",
+        "lsr x22, x21, x24",
         "rmif x22, #63, #nzCv",
-        "sub x23, x5, #0x1 (1)",
-        "lsl x21, x21, x23",
-        "orr x4, x20, x21",
-        "eor x20, x4, x22, lsl #63",
-        "rmif x20, #63, #nzcV"
+        "sub x21, x20, #0x1 (1)",
+        "lsl x20, x23, x21",
+        "orr x21, x30, x20",
+        "eor x20, x21, x22, lsl #63",
+        "rmif x20, #63, #nzcV",
+        "mov x4, x21"
       ]
     },
     "rcr ax, cl": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 22,
       "Comment": "GROUP2 0xd3 /3",
       "ExpectedArm64ASM": [
-        "and w20, w5, #0x1f",
-        "cbz x20, #+0x3c",
-        "cset w20, hs",
-        "uxth w21, w4",
-        "mov x0, x21",
-        "bfi x0, x20, #16, #1",
-        "mov x20, x0",
-        "bfi x20, x20, #17, #17",
-        "bfi x20, x20, #34, #17",
-        "lsr w21, w20, w5",
-        "bfxil x4, x21, #0, #16",
-        "sub w22, w5, #0x1 (1)",
-        "lsr w20, w20, w22",
+        "mov x20, x5",
+        "and w21, w20, #0x1f",
+        "cbz x21, #+0x50",
+        "mov x20, x5",
+        "cset w21, hs",
+        "mov x22, x4",
+        "uxth w23, w22",
+        "mov x24, x23",
+        "bfi x24, x21, #16, #1",
+        "mov x21, x24",
+        "bfi x21, x24, #17, #17",
+        "mov x23, x21",
+        "bfi x23, x21, #34, #17",
+        "lsr w21, w23, w20",
+        "mov x24, x22",
+        "bfxil x24, x21, #0, #16",
+        "mov x4, x24",
+        "sub w22, w20, #0x1 (1)",
+        "lsr w20, w23, w22",
         "rmif x20, #63, #nzCv",
         "eor w20, w21, w21, lsr #1",
         "rmif x20, #14, #nzcV"
       ]
     },
     "rcr eax, cl": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 18,
       "Comment": "GROUP2 0xd3 /3",
       "ExpectedArm64ASM": [
-        "and w20, w5, #0x1f",
-        "cbz x20, #+0x34",
-        "lsr w20, w4, w5",
-        "cset w21, hs",
-        "neg w22, w5",
-        "lsl w23, w4, w22",
-        "orr w20, w20, w23, lsl #1",
-        "sub w23, w5, #0x1 (1)",
-        "lsr w23, w4, w23",
-        "rmif x23, #63, #nzCv",
-        "lsl w21, w21, w22",
-        "orr w4, w20, w21",
-        "eor w20, w4, w4, lsr #1",
-        "rmif x20, #30, #nzcV"
+        "mov x20, x5",
+        "and w21, w20, #0x1f",
+        "cbz x21, #+0x40",
+        "mov x20, x5",
+        "mov x21, x4",
+        "lsr w22, w21, w20",
+        "cset w23, hs",
+        "neg w24, w20",
+        "lsl w25, w21, w24",
+        "orr w30, w22, w25, lsl #1",
+        "sub w22, w20, #0x1 (1)",
+        "lsr w20, w21, w22",
+        "rmif x20, #63, #nzCv",
+        "lsl w20, w23, w24",
+        "orr w21, w30, w20",
+        "eor w20, w21, w21, lsr #1",
+        "rmif x20, #30, #nzcV",
+        "mov x4, x21"
       ]
     },
     "rcr rax, cl": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 18,
       "Comment": "GROUP2 0xd3 /3",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x3f",
-        "cbz x20, #+0x34",
-        "lsr x20, x4, x5",
-        "cset w21, hs",
-        "neg x22, x5",
-        "lsl x23, x4, x22",
-        "orr x20, x20, x23, lsl #1",
-        "sub x23, x5, #0x1 (1)",
-        "lsr x23, x4, x23",
-        "rmif x23, #63, #nzCv",
-        "lsl x21, x21, x22",
-        "orr x4, x20, x21",
-        "eor x20, x4, x4, lsr #1",
-        "rmif x20, #62, #nzcV"
+        "mov x20, x5",
+        "and x21, x20, #0x3f",
+        "cbz x21, #+0x40",
+        "mov x20, x5",
+        "mov x21, x4",
+        "lsr x22, x21, x20",
+        "cset w23, hs",
+        "neg x24, x20",
+        "lsl x25, x21, x24",
+        "orr x30, x22, x25, lsl #1",
+        "sub x22, x20, #0x1 (1)",
+        "lsr x20, x21, x22",
+        "rmif x20, #63, #nzCv",
+        "lsl x20, x23, x24",
+        "orr x21, x30, x20",
+        "eor x20, x21, x21, lsr #1",
+        "rmif x20, #62, #nzcV",
+        "mov x4, x21"
       ]
     },
     "shl ax, cl": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 20,
       "Comment": "GROUP2 0xd3 /4",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "uxth w21, w5",
-        "lsl w22, w20, w21",
-        "bfxil x4, x22, #0, #16",
-        "cbz w21, #+0x24",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "mov x22, x5",
+        "uxth w23, w22",
+        "lsl w22, w21, w23",
+        "mov x24, x20",
+        "bfxil x24, x22, #0, #16",
+        "mov x4, x24",
+        "mov x20, x26",
+        "mov x24, x20",
+        "cbz w23, #+0x24",
         "cmn wzr, w22, lsl #16",
-        "mov x26, x22",
+        "mov x24, x22",
         "mov w0, #0x10",
-        "sub w0, w0, w21",
-        "lsr w0, w20, w0",
-        "eor w2, w20, w22",
+        "sub w0, w0, w23",
+        "lsr w0, w21, w0",
+        "eor w2, w21, w22",
         "rmif x0, #63, #nzCv",
-        "rmif x2, #15, #nzcV"
+        "rmif x2, #15, #nzcV",
+        "mov x26, x24"
       ]
     },
     "shl eax, cl": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP2 0xd3 /4",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "mov w21, w5",
-        "lsl w4, w20, w21",
-        "cbz w21, #+0x1c",
-        "ands w26, w4, w4",
-        "neg w0, w21",
-        "lsr w0, w20, w0",
-        "eor w2, w20, w4",
+        "mov x20, x4",
+        "mov w21, w20",
+        "mov x20, x5",
+        "mov w22, w20",
+        "lsl w20, w21, w22",
+        "mov x4, x20",
+        "mov x23, x26",
+        "mov x24, x23",
+        "cbz w22, #+0x1c",
+        "ands w24, w20, w20",
+        "neg w0, w22",
+        "lsr w0, w21, w0",
+        "eor w2, w21, w20",
         "rmif x0, #63, #nzCv",
-        "rmif x2, #31, #nzcV"
+        "rmif x2, #31, #nzcV",
+        "mov x26, x24"
       ]
     },
     "shl rax, cl": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP2 0xd3 /4",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "lsl x4, x20, x5",
-        "cbz x5, #+0x1c",
-        "ands x26, x4, x4",
-        "neg x0, x5",
+        "mov x21, x5",
+        "lsl x22, x20, x21",
+        "mov x4, x22",
+        "mov x23, x26",
+        "mov x24, x23",
+        "cbz x21, #+0x1c",
+        "ands x24, x22, x22",
+        "neg x0, x21",
         "lsr x0, x20, x0",
-        "eor x2, x20, x4",
+        "eor x2, x20, x22",
         "rmif x0, #63, #nzCv",
-        "rmif x2, #63, #nzcV"
+        "rmif x2, #63, #nzcV",
+        "mov x26, x24"
       ]
     },
     "shr ax, cl": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 19,
       "Comment": "GROUP2 0xd3 /5",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "uxth w21, w5",
-        "lsr w22, w20, w21",
-        "bfxil x4, x22, #0, #16",
-        "cbz w21, #+0x20",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "mov x22, x5",
+        "uxth w23, w22",
+        "lsr w22, w21, w23",
+        "mov x24, x20",
+        "bfxil x24, x22, #0, #16",
+        "mov x4, x24",
+        "mov x20, x26",
+        "mov x24, x20",
+        "cbz w23, #+0x20",
         "cmn wzr, w22, lsl #16",
-        "mov x26, x22",
-        "sub x0, x21, #0x1 (1)",
-        "lsr w0, w20, w0",
-        "eor w2, w20, w22",
+        "mov x24, x22",
+        "sub x0, x23, #0x1 (1)",
+        "lsr w0, w21, w0",
+        "eor w2, w21, w22",
         "rmif x0, #63, #nzCv",
-        "rmif x2, #15, #nzcV"
+        "rmif x2, #15, #nzcV",
+        "mov x26, x24"
       ]
     },
     "shr eax, cl": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP2 0xd3 /5",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "mov w21, w5",
-        "lsr w4, w20, w21",
-        "cbz w21, #+0x1c",
-        "ands w26, w4, w4",
-        "sub x0, x21, #0x1 (1)",
-        "lsr w0, w20, w0",
-        "eor w2, w20, w4",
+        "mov x20, x4",
+        "mov w21, w20",
+        "mov x20, x5",
+        "mov w22, w20",
+        "lsr w20, w21, w22",
+        "mov x4, x20",
+        "mov x23, x26",
+        "mov x24, x23",
+        "cbz w22, #+0x1c",
+        "ands w24, w20, w20",
+        "sub x0, x22, #0x1 (1)",
+        "lsr w0, w21, w0",
+        "eor w2, w21, w20",
         "rmif x0, #63, #nzCv",
-        "rmif x2, #31, #nzcV"
+        "rmif x2, #31, #nzcV",
+        "mov x26, x24"
       ]
     },
     "shr rax, cl": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP2 0xd3 /5",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "lsr x4, x20, x5",
-        "cbz x5, #+0x1c",
-        "ands x26, x4, x4",
-        "sub x0, x5, #0x1 (1)",
+        "mov x21, x5",
+        "lsr x22, x20, x21",
+        "mov x4, x22",
+        "mov x23, x26",
+        "mov x24, x23",
+        "cbz x21, #+0x1c",
+        "ands x24, x22, x22",
+        "sub x0, x21, #0x1 (1)",
         "lsr x0, x20, x0",
-        "eor x2, x20, x4",
+        "eor x2, x20, x22",
         "rmif x0, #63, #nzCv",
-        "rmif x2, #63, #nzcV"
+        "rmif x2, #63, #nzcV",
+        "mov x26, x24"
       ]
     },
     "sar ax, cl": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 18,
       "Comment": "GROUP2 0xd3 /7",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "uxth w21, w5",
-        "sxth x20, w20",
-        "asr w22, w20, w21",
-        "bfxil x4, x22, #0, #16",
-        "cbz w21, #+0x18",
-        "cmn wzr, w22, lsl #16",
-        "mov x26, x22",
-        "sub x0, x21, #0x1 (1)",
-        "lsr w0, w20, w0",
-        "rmif x0, #63, #nzCv"
+        "mov x20, x4",
+        "uxth w21, w20",
+        "mov x22, x5",
+        "uxth w23, w22",
+        "sxth x22, w21",
+        "asr w21, w22, w23",
+        "mov x24, x20",
+        "bfxil x24, x21, #0, #16",
+        "mov x4, x24",
+        "mov x20, x26",
+        "mov x24, x20",
+        "cbz w23, #+0x18",
+        "cmn wzr, w21, lsl #16",
+        "mov x24, x21",
+        "sub x0, x23, #0x1 (1)",
+        "lsr w0, w22, w0",
+        "rmif x0, #63, #nzCv",
+        "mov x26, x24"
       ]
     },
     "sar eax, cl": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP2 0xd3 /7",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "mov w21, w5",
-        "asr w4, w20, w21",
-        "cbz w21, #+0x14",
-        "ands w26, w4, w4",
-        "sub x0, x21, #0x1 (1)",
-        "lsr w0, w20, w0",
-        "rmif x0, #63, #nzCv"
+        "mov x20, x4",
+        "mov w21, w20",
+        "mov x20, x5",
+        "mov w22, w20",
+        "asr w20, w21, w22",
+        "mov x4, x20",
+        "mov x23, x26",
+        "mov x24, x23",
+        "cbz w22, #+0x14",
+        "ands w24, w20, w20",
+        "sub x0, x22, #0x1 (1)",
+        "lsr w0, w21, w0",
+        "rmif x0, #63, #nzCv",
+        "mov x26, x24"
       ]
     },
     "sar rax, cl": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP2 0xd3 /7",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "asr x4, x20, x5",
-        "cbz x5, #+0x14",
-        "ands x26, x4, x4",
-        "sub x0, x5, #0x1 (1)",
+        "mov x21, x5",
+        "asr x22, x20, x21",
+        "mov x4, x22",
+        "mov x23, x26",
+        "mov x24, x23",
+        "cbz x21, #+0x14",
+        "ands x24, x22, x22",
+        "sub x0, x21, #0x1 (1)",
         "lsr x0, x20, x0",
-        "rmif x0, #63, #nzCv"
+        "rmif x0, #63, #nzCv",
+        "mov x26, x24"
       ]
     },
     "test bl, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP2 0xf6 /0",
       "ExpectedArm64ASM": [
-        "and w26, w7, #0x1",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x7",
+        "and w21, w20, #0x1",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21"
       ]
     },
     "not bl": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf6 /2",
       "ExpectedArm64ASM": [
-        "eor x7, x7, #0xff"
+        "mov x20, x7",
+        "eor x21, x20, #0xff",
+        "mov x7, x21"
       ]
     },
     "neg bl": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP2 0xf6 /3",
       "ExpectedArm64ASM": [
-        "mov x27, x7",
-        "cmp wzr, w27, lsl #24",
-        "neg w26, w27",
+        "mov x20, x7",
+        "mov x27, x20",
+        "cmp wzr, w20, lsl #24",
+        "neg w21, w20",
+        "mov x26, x21",
         "cfinv",
-        "mov x7, x27",
-        "bfxil x7, x26, #0, #8"
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x7, x22"
       ]
     },
     "mul bl": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP2 0xf6 /4",
       "ExpectedArm64ASM": [
-        "uxtb x20, w7",
-        "uxtb x21, w4",
-        "mul x20, x20, x21",
-        "bfxil x4, x20, #0, #16",
-        "ubfx x20, x20, #8, #8",
+        "mov x20, x7",
+        "mov x21, x4",
+        "uxtb x22, w20",
+        "uxtb x20, w21",
+        "mul x23, x22, x20",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x4, x20",
+        "ubfx x20, x23, #8, #8",
         "cmp x20, #0x0 (0)",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul bl": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP2 0xf6 /5",
       "ExpectedArm64ASM": [
-        "sxtb x20, w7",
-        "sxtb x21, w4",
-        "mul x20, x20, x21",
-        "bfxil x4, x20, #0, #16",
-        "sbfx x21, x20, #8, #8",
-        "sbfx x20, x20, #7, #1",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "sxtb x22, w20",
+        "sxtb x20, w21",
+        "mul x23, x22, x20",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x4, x20",
+        "sbfx x20, x23, #8, #8",
+        "sbfx x21, x23, #7, #1",
+        "cmp x20, x21",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "div bl": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP2 0xf6 /6",
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "uxth w21, w4",
-        "uxth w0, w21",
-        "uxth w1, w20",
-        "udiv w22, w0, w1",
-        "uxth w0, w21",
-        "uxth w1, w20",
+        "mov x20, x7",
+        "uxtb w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "uxth w0, w22",
+        "uxth w1, w21",
+        "udiv w23, w0, w1",
+        "uxth w0, w22",
+        "uxth w1, w21",
         "udiv w2, w0, w1",
-        "msub w20, w2, w1, w0",
-        "mov x0, x22",
-        "bfi x0, x20, #8, #8",
-        "mov x20, x0",
-        "bfxil x4, x20, #0, #16"
+        "msub w24, w2, w1, w0",
+        "mov x21, x23",
+        "bfi x21, x24, #8, #8",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "idiv bl": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP2 0xf6 /7",
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "uxth w21, w4",
-        "sxth x21, w21",
-        "sxtb x20, w20",
-        "sdiv x22, x21, x20",
-        "sdiv x0, x21, x20",
-        "msub x20, x0, x20, x21",
-        "mov x0, x22",
-        "bfi x0, x20, #8, #8",
-        "mov x20, x0",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x7",
+        "uxtb w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "sxth x23, w22",
+        "sxtb x22, w21",
+        "sdiv x21, x23, x22",
+        "sdiv x0, x23, x22",
+        "msub x24, x0, x22, x23",
+        "mov x22, x21",
+        "bfi x22, x24, #8, #8",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "test bx, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP2 0xf7 /0",
       "ExpectedArm64ASM": [
-        "and w26, w7, #0x1",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x7",
+        "and w21, w20, #0x1",
+        "cmn wzr, w21, lsl #16",
+        "mov x26, x21"
       ]
     },
     "test ebx, 1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf7 /0",
       "ExpectedArm64ASM": [
-        "ands w26, w7, #0x1"
+        "mov x20, x7",
+        "ands w21, w20, #0x1",
+        "mov x26, x21"
       ]
     },
     "test rbx, 1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf7 /0",
       "ExpectedArm64ASM": [
-        "ands x26, x7, #0x1"
+        "mov x20, x7",
+        "ands x21, x20, #0x1",
+        "mov x26, x21"
       ]
     },
     "test bx, -1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP2 0xf7 /0",
       "ExpectedArm64ASM": [
-        "mov x26, x7",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x7",
+        "mov x21, x20",
+        "cmn wzr, w21, lsl #16",
+        "mov x26, x21"
       ]
     },
     "test ebx, -1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf7 /0",
       "ExpectedArm64ASM": [
-        "ands w26, w7, w7"
+        "mov x20, x7",
+        "ands w21, w20, w20",
+        "mov x26, x21"
       ]
     },
     "test rbx, -1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf7 /0",
       "ExpectedArm64ASM": [
-        "ands x26, x7, x7"
+        "mov x20, x7",
+        "ands x21, x20, x20",
+        "mov x26, x21"
       ]
     },
     "neg bx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP2 0xf7 /2",
       "ExpectedArm64ASM": [
-        "mov x27, x7",
-        "cmp wzr, w27, lsl #16",
-        "neg w26, w27",
+        "mov x20, x7",
+        "mov x27, x20",
+        "cmp wzr, w20, lsl #16",
+        "neg w21, w20",
+        "mov x26, x21",
         "cfinv",
-        "mov x7, x27",
-        "bfxil x7, x26, #0, #16"
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x7, x22"
       ]
     },
     "neg ebx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP2 0xf7 /2",
       "ExpectedArm64ASM": [
-        "mov x27, x7",
-        "negs w26, w27",
+        "mov x20, x7",
+        "mov x27, x20",
+        "negs w21, w20",
+        "mov x26, x21",
         "cfinv",
-        "mov x7, x26"
+        "mov x7, x21"
       ]
     },
     "neg rbx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP2 0xf7 /2",
       "ExpectedArm64ASM": [
-        "mov x27, x7",
-        "negs x26, x27",
+        "mov x20, x7",
+        "mov x27, x20",
+        "negs x21, x20",
+        "mov x26, x21",
         "cfinv",
-        "mov x7, x26"
+        "mov x7, x21"
       ]
     },
     "mul bx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 15,
       "Comment": "GROUP2 0xf7 /3",
       "ExpectedArm64ASM": [
-        "uxth x20, w7",
-        "uxth x21, w4",
-        "mul x20, x20, x21",
-        "bfxil x4, x20, #0, #16",
-        "ubfx x20, x20, #16, #16",
-        "bfxil x6, x20, #0, #16",
+        "mov x20, x7",
+        "mov x21, x4",
+        "uxth x22, w20",
+        "uxth x20, w21",
+        "mul x23, x22, x20",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x4, x20",
+        "ubfx x20, x23, #16, #16",
+        "mov x21, x6",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x6, x22",
         "cmp x20, #0x0 (0)",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "mul ebx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP2 0xf7 /3",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "mul x20, x20, x21",
-        "mov w4, w20",
-        "lsr x6, x20, #32",
-        "cmp x6, #0x0 (0)",
+        "mov x20, x7",
+        "mov x21, x4",
+        "mov w22, w20",
+        "mov w20, w21",
+        "mul x21, x22, x20",
+        "mov w20, w21",
+        "lsr x22, x21, #32",
+        "mov x4, x20",
+        "mov x6, x22",
+        "cmp x22, #0x0 (0)",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "mul rbx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP2 0xf7 /3",
       "ExpectedArm64ASM": [
-        "mov x20, x4",
-        "mul x4, x7, x20",
-        "umulh x6, x7, x20",
-        "cmp x6, #0x0 (0)",
+        "mov x20, x7",
+        "mov x21, x4",
+        "mul x22, x20, x21",
+        "umulh x23, x20, x21",
+        "mov x4, x22",
+        "mov x6, x23",
+        "cmp x23, #0x0 (0)",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul bx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP2 0xf7 /5",
       "ExpectedArm64ASM": [
-        "sxth x20, w7",
-        "sxth x21, w4",
-        "mul x20, x20, x21",
-        "bfxil x4, x20, #0, #16",
-        "sbfx x21, x20, #16, #16",
-        "bfxil x6, x21, #0, #16",
-        "sbfx x20, x20, #15, #1",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "sxth x22, w20",
+        "sxth x20, w21",
+        "mul x23, x22, x20",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x4, x20",
+        "sbfx x20, x23, #16, #16",
+        "mov x21, x6",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x6, x22",
+        "sbfx x21, x23, #15, #1",
+        "cmp x20, x21",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul ebx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP2 0xf7 /5",
       "ExpectedArm64ASM": [
-        "sxtw x20, w7",
-        "sxtw x21, w4",
-        "mul x20, x20, x21",
-        "mov w4, w20",
-        "lsr x6, x20, #32",
-        "asr x21, x20, #32",
-        "sxtw x20, w20",
-        "sbfx x20, x20, #31, #1",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "sxtw x22, w20",
+        "sxtw x20, w21",
+        "mul x21, x22, x20",
+        "mov w20, w21",
+        "lsr x22, x21, #32",
+        "asr x23, x21, #32",
+        "sxtw x24, w21",
+        "mov x4, x20",
+        "mov x6, x22",
+        "sbfx x20, x24, #31, #1",
+        "cmp x23, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul rbx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP2 0xf7 /5",
       "ExpectedArm64ASM": [
-        "smulh x6, x7, x4",
-        "mul x4, x7, x4",
-        "asr x20, x4, #63",
-        "cmp x6, x20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "smulh x22, x20, x21",
+        "mul x23, x20, x21",
+        "mov x4, x23",
+        "mov x6, x22",
+        "asr x20, x23, #63",
+        "cmp x22, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "div bx": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 19,
       "Comment": "GROUP2 0xf7 /6",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w4",
-        "uxth w22, w6",
-        "uxth w0, w21",
-        "bfi w0, w22, #16, #16",
-        "udiv w23, w0, w20",
-        "uxth w0, w21",
-        "bfi w0, w22, #16, #16",
-        "udiv w1, w0, w20",
-        "msub w20, w1, w20, w0",
-        "bfxil x4, x23, #0, #16",
-        "bfxil x6, x20, #0, #16"
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "mov x23, x6",
+        "uxth w24, w23",
+        "uxth w0, w22",
+        "bfi w0, w24, #16, #16",
+        "udiv w25, w0, w21",
+        "uxth w0, w22",
+        "bfi w0, w24, #16, #16",
+        "udiv w1, w0, w21",
+        "msub w30, w1, w21, w0",
+        "mov x21, x20",
+        "bfxil x21, x25, #0, #16",
+        "mov x4, x21",
+        "mov x20, x23",
+        "bfxil x20, x30, #0, #16",
+        "mov x6, x20"
       ]
     },
     "inc al": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP3 0xfe /0",
       "ExpectedArm64ASM": [
-        "uxtb w27, w4",
-        "add w26, w27, #0x1 (1)",
-        "setf8 w26",
-        "bic w20, w26, w27",
-        "rmif x20, #7, #nzcV",
-        "bfxil x4, x26, #0, #8"
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "add w22, w21, #0x1 (1)",
+        "mov x26, x22",
+        "mov x27, x21",
+        "setf8 w22",
+        "bic w23, w22, w21",
+        "rmif x23, #7, #nzcV",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #8",
+        "mov x4, x21"
       ]
     },
     "dec al": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP3 0xfe /1",
       "ExpectedArm64ASM": [
-        "uxtb w27, w4",
-        "sub w26, w27, #0x1 (1)",
-        "setf8 w26",
-        "bic w20, w27, w26",
-        "rmif x20, #7, #nzcV",
-        "bfxil x4, x26, #0, #8"
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "sub w22, w21, #0x1 (1)",
+        "mov x26, x22",
+        "mov x27, x21",
+        "setf8 w22",
+        "bic w23, w21, w22",
+        "rmif x23, #7, #nzcV",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #8",
+        "mov x4, x21"
       ]
     },
     "inc ax": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP4 0xfe /0",
       "ExpectedArm64ASM": [
-        "uxth w27, w4",
-        "add w26, w27, #0x1 (1)",
-        "setf16 w26",
-        "bic w20, w26, w27",
-        "rmif x20, #15, #nzcV",
-        "bfxil x4, x26, #0, #16"
+        "mov x20, x4",
+        "uxth w21, w20",
+        "add w22, w21, #0x1 (1)",
+        "mov x26, x22",
+        "mov x27, x21",
+        "setf16 w22",
+        "bic w23, w22, w21",
+        "rmif x23, #15, #nzcV",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "inc eax": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP4 0xfe /0",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "cset w20, hs",
-        "adds w26, w27, #0x1 (1)",
-        "rmif x20, #63, #nzCv",
-        "mov x4, x26"
+        "mov x20, x4",
+        "cset w21, hs",
+        "mov x27, x20",
+        "adds w22, w20, #0x1 (1)",
+        "mov x26, x22",
+        "rmif x21, #63, #nzCv",
+        "mov x4, x22"
       ]
     },
     "inc rax": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP4 0xfe /0",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "cset w20, hs",
-        "adds x26, x27, #0x1 (1)",
-        "rmif x20, #63, #nzCv",
-        "mov x4, x26"
+        "mov x20, x4",
+        "cset w21, hs",
+        "mov x27, x20",
+        "adds x22, x20, #0x1 (1)",
+        "mov x26, x22",
+        "rmif x21, #63, #nzCv",
+        "mov x4, x22"
       ]
     },
     "dec ax": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP4 0xfe /1",
       "ExpectedArm64ASM": [
-        "uxth w27, w4",
-        "sub w26, w27, #0x1 (1)",
-        "setf16 w26",
-        "bic w20, w27, w26",
-        "rmif x20, #15, #nzcV",
-        "bfxil x4, x26, #0, #16"
+        "mov x20, x4",
+        "uxth w21, w20",
+        "sub w22, w21, #0x1 (1)",
+        "mov x26, x22",
+        "mov x27, x21",
+        "setf16 w22",
+        "bic w23, w21, w22",
+        "rmif x23, #15, #nzcV",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "dec eax": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP4 0xfe /1",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "cset w20, hs",
-        "subs w26, w27, #0x1 (1)",
-        "rmif x20, #63, #nzCv",
-        "mov x4, x26"
+        "mov x20, x4",
+        "cset w21, hs",
+        "mov x27, x20",
+        "subs w22, w20, #0x1 (1)",
+        "mov x26, x22",
+        "rmif x21, #63, #nzCv",
+        "mov x4, x22"
       ]
     },
     "dec rax": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP4 0xfe /1",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "cset w20, hs",
-        "subs x26, x27, #0x1 (1)",
-        "rmif x20, #63, #nzCv",
-        "mov x4, x26"
+        "mov x20, x4",
+        "cset w21, hs",
+        "mov x27, x20",
+        "subs x22, x20, #0x1 (1)",
+        "mov x26, x22",
+        "rmif x21, #63, #nzCv",
+        "mov x4, x22"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FlagM/Primary_32Bit.json b/unittests/InstructionCountCI/FlagM/Primary_32Bit.json
index 4c0f7dc6ce..0b718153e3 100644
--- a/unittests/InstructionCountCI/FlagM/Primary_32Bit.json
+++ b/unittests/InstructionCountCI/FlagM/Primary_32Bit.json
@@ -12,370 +12,503 @@
   },
   "Instructions": {
     "push es": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x06",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x28, #136]",
-        "str w20, [x8, #-4]!"
+        "mov w20, w8",
+        "ldrh w21, [x28, #136]",
+        "mov w22, w20",
+        "str w21, [x22, #-4]!",
+        "mov w8, w22"
       ]
     },
     "pop es": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x07",
       "ExpectedArm64ASM": [
-        "ldr w20, [x8]",
-        "add x8, x8, #0x4 (4)",
-        "strh w20, [x28, #136]",
-        "ubfx w20, w20, #3, #13",
+        "mov w20, w8",
+        "ldr w21, [x20]",
+        "add x22, x20, #0x4 (4)",
+        "mov w8, w22",
+        "strh w21, [x28, #136]",
+        "ubfx w20, w21, #3, #13",
         "add x0, x28, x20, lsl #2",
-        "ldr w20, [x0, #896]",
-        "str w20, [x28, #152]"
+        "ldr w21, [x0, #896]",
+        "str w21, [x28, #152]"
       ]
     },
     "push cs": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0e",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x28, #138]",
-        "str w20, [x8, #-4]!"
+        "mov w20, w8",
+        "ldrh w21, [x28, #138]",
+        "mov w22, w20",
+        "str w21, [x22, #-4]!",
+        "mov w8, w22"
       ]
     },
     "push ss": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x16",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x28, #140]",
-        "str w20, [x8, #-4]!"
+        "mov w20, w8",
+        "ldrh w21, [x28, #140]",
+        "mov w22, w20",
+        "str w21, [x22, #-4]!",
+        "mov w8, w22"
       ]
     },
     "pop ss": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x17",
       "ExpectedArm64ASM": [
-        "ldr w20, [x8]",
-        "add x8, x8, #0x4 (4)",
-        "strh w20, [x28, #140]",
-        "ubfx w20, w20, #3, #13",
+        "mov w20, w8",
+        "ldr w21, [x20]",
+        "add x22, x20, #0x4 (4)",
+        "mov w8, w22",
+        "strh w21, [x28, #140]",
+        "ubfx w20, w21, #3, #13",
         "add x0, x28, x20, lsl #2",
-        "ldr w20, [x0, #896]",
-        "str w20, [x28, #160]"
+        "ldr w21, [x0, #896]",
+        "str w21, [x28, #160]"
       ]
     },
     "push ds": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x1e",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x28, #142]",
-        "str w20, [x8, #-4]!"
+        "mov w20, w8",
+        "ldrh w21, [x28, #142]",
+        "mov w22, w20",
+        "str w21, [x22, #-4]!",
+        "mov w8, w22"
       ]
     },
     "pop ds": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x1f",
       "ExpectedArm64ASM": [
-        "ldr w20, [x8]",
-        "add x8, x8, #0x4 (4)",
-        "strh w20, [x28, #142]",
-        "ubfx w20, w20, #3, #13",
+        "mov w20, w8",
+        "ldr w21, [x20]",
+        "add x22, x20, #0x4 (4)",
+        "mov w8, w22",
+        "strh w21, [x28, #142]",
+        "ubfx w20, w21, #3, #13",
         "add x0, x28, x20, lsl #2",
-        "ldr w20, [x0, #896]",
-        "str w20, [x28, #164]"
+        "ldr w21, [x0, #896]",
+        "str w21, [x28, #164]"
       ]
     },
     "daa": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 28,
       "Comment": "0x27",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "cset w21, hs",
-        "and x22, x20, #0xf",
-        "cmp x22, #0x9 (9)",
-        "cset x22, hi",
-        "eor w23, w27, w26",
-        "ubfx w23, w23, #4, #1",
-        "orr x22, x23, x22",
-        "cmp x20, #0x99 (153)",
+        "mov w20, w4",
+        "uxtb w21, w20",
+        "cset w22, hs",
+        "and x23, x21, #0xf",
+        "cmp x23, #0x9 (9)",
+        "cset x24, hi",
+        "mov w23, w27",
+        "mov w25, w26",
+        "eor w12, w23, w25",
+        "ubfx w23, w12, #4, #1",
+        "orr x25, x23, x24",
+        "cmp x21, #0x99 (153)",
         "cset x23, hi",
-        "orr x21, x21, x23",
-        "add x23, x20, #0x6 (6)",
-        "cmp x22, #0x0 (0)",
-        "csel x20, x23, x20, ne",
-        "add x23, x20, #0x60 (96)",
-        "cmp x21, #0x0 (0)",
-        "csel x26, x23, x20, ne",
-        "bfxil w4, w26, #0, #8",
-        "cmn wzr, w26, lsl #24",
-        "rmif x21, #63, #nzCv",
-        "eor w27, w26, w22, lsl #4"
+        "orr x24, x22, x23",
+        "add x22, x21, #0x6 (6)",
+        "cmp x25, #0x0 (0)",
+        "csel x23, x22, x21, ne",
+        "add x21, x23, #0x60 (96)",
+        "cmp x24, #0x0 (0)",
+        "csel x22, x21, x23, ne",
+        "mov w21, w20",
+        "bfxil w21, w22, #0, #8",
+        "mov w4, w21",
+        "cmn wzr, w22, lsl #24",
+        "rmif x24, #63, #nzCv",
+        "mov w26, w22",
+        "eor w20, w22, w25, lsl #4",
+        "mov w27, w20"
       ]
     },
     "das": {
-      "ExpectedInstructionCount": 24,
+      "ExpectedInstructionCount": 31,
       "Comment": "0x2f",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "cset w21, hs",
-        "and x22, x20, #0xf",
-        "cmp x22, #0x9 (9)",
-        "cset x22, hi",
-        "eor w23, w27, w26",
-        "ubfx w23, w23, #4, #1",
-        "orr x22, x23, x22",
-        "cmp x20, #0x99 (153)",
+        "mov w20, w4",
+        "uxtb w21, w20",
+        "cset w22, hs",
+        "and x23, x21, #0xf",
+        "cmp x23, #0x9 (9)",
+        "cset x24, hi",
+        "mov w23, w27",
+        "mov w25, w26",
+        "eor w12, w23, w25",
+        "ubfx w23, w12, #4, #1",
+        "orr x25, x23, x24",
+        "cmp x21, #0x99 (153)",
         "cset x23, hi",
-        "orr x21, x21, x23",
-        "cmp x20, #0x6 (6)",
-        "csel x23, x22, x21, lo",
-        "orr w23, w21, w23",
-        "sub x24, x20, #0x6 (6)",
-        "cmp x22, #0x0 (0)",
-        "csel x20, x24, x20, ne",
-        "sub x24, x20, #0x60 (96)",
-        "cmp x21, #0x0 (0)",
-        "csel x26, x24, x20, ne",
-        "bfxil w4, w26, #0, #8",
-        "cmn wzr, w26, lsl #24",
+        "orr x24, x22, x23",
+        "cmp x21, #0x6 (6)",
+        "csel x22, x25, x24, lo",
+        "orr w23, w24, w22",
+        "sub x22, x21, #0x6 (6)",
+        "cmp x25, #0x0 (0)",
+        "csel x12, x22, x21, ne",
+        "sub x21, x12, #0x60 (96)",
+        "cmp x24, #0x0 (0)",
+        "csel x22, x21, x12, ne",
+        "mov w21, w20",
+        "bfxil w21, w22, #0, #8",
+        "mov w4, w21",
+        "cmn wzr, w22, lsl #24",
         "rmif x23, #63, #nzCv",
-        "eor w27, w26, w22, lsl #4"
+        "mov w26, w22",
+        "eor w20, w22, w25, lsl #4",
+        "mov w27, w20"
       ]
     },
     "aaa": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 20,
       "Comment": "0x37",
       "ExpectedArm64ASM": [
-        "and x20, x4, #0xf",
-        "cmp x20, #0x9 (9)",
-        "cset x20, hi",
-        "eor w21, w27, w26",
-        "ubfx w21, w21, #4, #1",
-        "orr x20, x21, x20",
-        "lsl x21, x20, #29",
-        "eor w27, w26, w20, lsl #4",
+        "mov w20, w4",
+        "and x21, x20, #0xf",
+        "cmp x21, #0x9 (9)",
+        "cset x22, hi",
+        "mov w21, w27",
+        "mov w23, w26",
+        "eor w24, w21, w23",
+        "ubfx w21, w24, #4, #1",
+        "orr x24, x21, x22",
+        "lsl x21, x24, #29",
+        "eor w22, w23, w24, lsl #4",
+        "mov w27, w22",
         "msr nzcv, x21",
-        "add w20, w4, #0x106 (262)",
-        "csel w20, w20, w4, hs",
+        "add w21, w20, #0x106 (262)",
+        "csel w22, w21, w20, hs",
         "mov w21, #0xff0f",
-        "and w20, w20, w21",
-        "bfxil w4, w20, #0, #16"
+        "and w23, w22, w21",
+        "mov w21, w20",
+        "bfxil w21, w23, #0, #16",
+        "mov w4, w21"
       ]
     },
     "aas": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 20,
       "Comment": "0x3f",
       "ExpectedArm64ASM": [
-        "and x20, x4, #0xf",
-        "cmp x20, #0x9 (9)",
-        "cset x20, hi",
-        "eor w21, w27, w26",
-        "ubfx w21, w21, #4, #1",
-        "orr x20, x21, x20",
-        "lsl x21, x20, #29",
-        "eor w27, w26, w20, lsl #4",
+        "mov w20, w4",
+        "and x21, x20, #0xf",
+        "cmp x21, #0x9 (9)",
+        "cset x22, hi",
+        "mov w21, w27",
+        "mov w23, w26",
+        "eor w24, w21, w23",
+        "ubfx w21, w24, #4, #1",
+        "orr x24, x21, x22",
+        "lsl x21, x24, #29",
+        "eor w22, w23, w24, lsl #4",
+        "mov w27, w22",
         "msr nzcv, x21",
-        "sub w20, w4, #0x106 (262)",
-        "csel w20, w20, w4, hs",
+        "sub w21, w20, #0x106 (262)",
+        "csel w22, w21, w20, hs",
         "mov w21, #0xff0f",
-        "and w20, w20, w21",
-        "bfxil w4, w20, #0, #16"
+        "and w23, w22, w21",
+        "mov w21, w20",
+        "bfxil w21, w23, #0, #16",
+        "mov w4, w21"
       ]
     },
     "inc ax": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x40",
       "ExpectedArm64ASM": [
-        "uxth w27, w4",
-        "add w26, w27, #0x1 (1)",
-        "setf16 w26",
-        "bic w20, w26, w27",
-        "rmif x20, #15, #nzcV",
-        "bfxil w4, w26, #0, #16"
+        "mov w20, w4",
+        "uxth w21, w20",
+        "add w22, w21, #0x1 (1)",
+        "mov w26, w22",
+        "mov w27, w21",
+        "setf16 w22",
+        "bic w23, w22, w21",
+        "rmif x23, #15, #nzcV",
+        "mov w21, w20",
+        "bfxil w21, w22, #0, #16",
+        "mov w4, w21"
       ]
     },
     "inc eax": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x40",
       "ExpectedArm64ASM": [
-        "mov w27, w4",
-        "cset w20, hs",
-        "adds w26, w27, #0x1 (1)",
-        "rmif x20, #63, #nzCv",
-        "mov w4, w26"
+        "mov w20, w4",
+        "cset w21, hs",
+        "mov w27, w20",
+        "adds w22, w20, #0x1 (1)",
+        "mov w26, w22",
+        "rmif x21, #63, #nzCv",
+        "mov w4, w22"
       ]
     },
     "dec ax": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x48",
       "ExpectedArm64ASM": [
-        "uxth w27, w4",
-        "sub w26, w27, #0x1 (1)",
-        "setf16 w26",
-        "bic w20, w27, w26",
-        "rmif x20, #15, #nzcV",
-        "bfxil w4, w26, #0, #16"
+        "mov w20, w4",
+        "uxth w21, w20",
+        "sub w22, w21, #0x1 (1)",
+        "mov w26, w22",
+        "mov w27, w21",
+        "setf16 w22",
+        "bic w23, w21, w22",
+        "rmif x23, #15, #nzcV",
+        "mov w21, w20",
+        "bfxil w21, w22, #0, #16",
+        "mov w4, w21"
       ]
     },
     "push ax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x50",
       "ExpectedArm64ASM": [
-        "strh w4, [x8, #-2]!"
+        "mov w20, w4",
+        "mov w21, w8",
+        "mov w22, w21",
+        "strh w20, [x22, #-2]!",
+        "mov w8, w22"
       ]
     },
     "push eax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x50",
       "ExpectedArm64ASM": [
-        "str w4, [x8, #-4]!"
+        "mov w20, w4",
+        "mov w21, w8",
+        "mov w22, w21",
+        "str w20, [x22, #-4]!",
+        "mov w8, w22"
       ]
     },
     "dec eax": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x48",
       "ExpectedArm64ASM": [
-        "mov w27, w4",
-        "cset w20, hs",
-        "subs w26, w27, #0x1 (1)",
-        "rmif x20, #63, #nzCv",
-        "mov w4, w26"
+        "mov w20, w4",
+        "cset w21, hs",
+        "mov w27, w20",
+        "subs w22, w20, #0x1 (1)",
+        "mov w26, w22",
+        "rmif x21, #63, #nzCv",
+        "mov w4, w22"
       ]
     },
     "pusha": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 25,
       "Comment": "0x60",
       "ExpectedArm64ASM": [
         "mov w20, w8",
-        "str w4, [x20, #-4]!",
-        "str w5, [x20, #-4]!",
-        "str w6, [x20, #-4]!",
-        "str w7, [x20, #-4]!",
-        "str w8, [x20, #-4]!",
-        "str w9, [x20, #-4]!",
-        "str w10, [x20, #-4]!",
-        "mov w8, w20",
-        "str w11, [x8, #-4]!"
+        "mov w21, w4",
+        "mov w22, w20",
+        "str w21, [x22, #-4]!",
+        "mov w21, w5",
+        "mov w23, w22",
+        "str w21, [x23, #-4]!",
+        "mov w21, w6",
+        "mov w22, w23",
+        "str w21, [x22, #-4]!",
+        "mov w21, w7",
+        "mov w23, w22",
+        "str w21, [x23, #-4]!",
+        "mov w21, w23",
+        "str w20, [x21, #-4]!",
+        "mov w20, w9",
+        "mov w22, w21",
+        "str w20, [x22, #-4]!",
+        "mov w20, w10",
+        "mov w21, w22",
+        "str w20, [x21, #-4]!",
+        "mov w20, w11",
+        "mov w22, w21",
+        "str w20, [x22, #-4]!",
+        "mov w8, w22"
       ]
     },
     "pushad": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 25,
       "Comment": "0x60",
       "ExpectedArm64ASM": [
         "mov w20, w8",
-        "str w4, [x20, #-4]!",
-        "str w5, [x20, #-4]!",
-        "str w6, [x20, #-4]!",
-        "str w7, [x20, #-4]!",
-        "str w8, [x20, #-4]!",
-        "str w9, [x20, #-4]!",
-        "str w10, [x20, #-4]!",
-        "mov w8, w20",
-        "str w11, [x8, #-4]!"
+        "mov w21, w4",
+        "mov w22, w20",
+        "str w21, [x22, #-4]!",
+        "mov w21, w5",
+        "mov w23, w22",
+        "str w21, [x23, #-4]!",
+        "mov w21, w6",
+        "mov w22, w23",
+        "str w21, [x22, #-4]!",
+        "mov w21, w7",
+        "mov w23, w22",
+        "str w21, [x23, #-4]!",
+        "mov w21, w23",
+        "str w20, [x21, #-4]!",
+        "mov w20, w9",
+        "mov w22, w21",
+        "str w20, [x22, #-4]!",
+        "mov w20, w10",
+        "mov w21, w22",
+        "str w20, [x21, #-4]!",
+        "mov w20, w11",
+        "mov w22, w21",
+        "str w20, [x22, #-4]!",
+        "mov w8, w22"
       ]
     },
     "popa": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 23,
       "Comment": "0x61",
       "ExpectedArm64ASM": [
-        "ldr w11, [x8]",
-        "add x20, x8, #0x4 (4)",
-        "ldr w10, [x20]",
-        "add x20, x20, #0x4 (4)",
-        "ldr w9, [x20]",
-        "add x20, x20, #0x8 (8)",
-        "ldr w7, [x20]",
-        "add x20, x20, #0x4 (4)",
-        "ldr w6, [x20]",
-        "add x20, x20, #0x4 (4)",
-        "ldr w5, [x20]",
-        "add x20, x20, #0x4 (4)",
-        "ldr w4, [x20]",
-        "add x8, x20, #0x4 (4)"
+        "mov w20, w8",
+        "ldr w21, [x20]",
+        "mov w11, w21",
+        "add x21, x20, #0x4 (4)",
+        "ldr w20, [x21]",
+        "mov w10, w20",
+        "add x20, x21, #0x4 (4)",
+        "ldr w21, [x20]",
+        "mov w9, w21",
+        "add x21, x20, #0x8 (8)",
+        "ldr w20, [x21]",
+        "mov w7, w20",
+        "add x20, x21, #0x4 (4)",
+        "ldr w21, [x20]",
+        "mov w6, w21",
+        "add x21, x20, #0x4 (4)",
+        "ldr w20, [x21]",
+        "mov w5, w20",
+        "add x20, x21, #0x4 (4)",
+        "ldr w21, [x20]",
+        "mov w4, w21",
+        "add x21, x20, #0x4 (4)",
+        "mov w8, w21"
       ]
     },
     "popad": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 23,
       "Comment": "0x61",
       "ExpectedArm64ASM": [
-        "ldr w11, [x8]",
-        "add x20, x8, #0x4 (4)",
-        "ldr w10, [x20]",
-        "add x20, x20, #0x4 (4)",
-        "ldr w9, [x20]",
-        "add x20, x20, #0x8 (8)",
-        "ldr w7, [x20]",
-        "add x20, x20, #0x4 (4)",
-        "ldr w6, [x20]",
-        "add x20, x20, #0x4 (4)",
-        "ldr w5, [x20]",
-        "add x20, x20, #0x4 (4)",
-        "ldr w4, [x20]",
-        "add x8, x20, #0x4 (4)"
+        "mov w20, w8",
+        "ldr w21, [x20]",
+        "mov w11, w21",
+        "add x21, x20, #0x4 (4)",
+        "ldr w20, [x21]",
+        "mov w10, w20",
+        "add x20, x21, #0x4 (4)",
+        "ldr w21, [x20]",
+        "mov w9, w21",
+        "add x21, x20, #0x8 (8)",
+        "ldr w20, [x21]",
+        "mov w7, w20",
+        "add x20, x21, #0x4 (4)",
+        "ldr w21, [x20]",
+        "mov w6, w21",
+        "add x21, x20, #0x4 (4)",
+        "ldr w20, [x21]",
+        "mov w5, w20",
+        "add x20, x21, #0x4 (4)",
+        "ldr w21, [x20]",
+        "mov w4, w21",
+        "add x21, x20, #0x4 (4)",
+        "mov w8, w21"
       ]
     },
     "aam": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": "0xd4",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "mov w21, #0xa",
-        "udiv x22, x20, x21",
-        "udiv x2, x20, x21",
-        "msub x20, x2, x21, x20",
-        "add x26, x20, x22, lsl #8",
-        "bfxil w4, w26, #0, #16",
-        "cmn wzr, w26, lsl #24"
+        "mov w20, w4",
+        "uxtb w21, w20",
+        "mov w22, #0xa",
+        "udiv x23, x21, x22",
+        "udiv x2, x21, x22",
+        "msub x24, x2, x22, x21",
+        "add x21, x24, x23, lsl #8",
+        "mov w22, w20",
+        "bfxil w22, w21, #0, #16",
+        "mov w4, w22",
+        "cmn wzr, w21, lsl #24",
+        "mov w26, w21"
       ]
     },
     "aad": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0xd5",
       "ExpectedArm64ASM": [
-        "lsr w20, w4, #8",
-        "mov w21, #0xa",
-        "mul x20, x20, x21",
-        "add x20, x4, x20",
-        "and x26, x20, #0xff",
-        "bfxil w4, w26, #0, #16",
-        "cmn wzr, w26, lsl #24"
+        "mov w20, w4",
+        "lsr w21, w20, #8",
+        "mov w22, #0xa",
+        "mul x23, x21, x22",
+        "add x21, x20, x23",
+        "and x22, x21, #0xff",
+        "mov w21, w20",
+        "bfxil w21, w22, #0, #16",
+        "mov w4, w21",
+        "cmn wzr, w22, lsl #24",
+        "mov w26, w22"
       ]
     },
     "db 0xd4, 0x40": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "aam with a different immediate byte base",
         "0xd4"
       ],
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "mov w21, #0x40",
-        "udiv x22, x20, x21",
-        "udiv x2, x20, x21",
-        "msub x20, x2, x21, x20",
-        "add x26, x20, x22, lsl #8",
-        "bfxil w4, w26, #0, #16",
-        "cmn wzr, w26, lsl #24"
+        "mov w20, w4",
+        "uxtb w21, w20",
+        "mov w22, #0x40",
+        "udiv x23, x21, x22",
+        "udiv x2, x21, x22",
+        "msub x24, x2, x22, x21",
+        "add x21, x24, x23, lsl #8",
+        "mov w22, w20",
+        "bfxil w22, w21, #0, #16",
+        "mov w4, w22",
+        "cmn wzr, w21, lsl #24",
+        "mov w26, w21"
       ]
     },
     "db 0xd5, 0x40": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "aad with a different immediate byte base",
         "0xd5"
       ],
       "ExpectedArm64ASM": [
-        "lsr w20, w4, #8",
-        "lsl x20, x20, #6",
-        "add x20, x4, x20",
-        "and x26, x20, #0xff",
-        "bfxil w4, w26, #0, #16",
-        "cmn wzr, w26, lsl #24"
+        "mov w20, w4",
+        "lsr w21, w20, #8",
+        "lsl x22, x21, #6",
+        "add x21, x20, x22",
+        "and x22, x21, #0xff",
+        "mov w21, w20",
+        "bfxil w21, w22, #0, #16",
+        "mov w4, w21",
+        "cmn wzr, w22, lsl #24",
+        "mov w26, w22"
       ]
     },
     "salc": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xd6",
       "ExpectedArm64ASM": [
         "csetm w20, hs",
-        "bfxil w4, w20, #0, #8"
+        "mov w21, w4",
+        "mov w22, w21",
+        "bfxil w22, w20, #0, #8",
+        "mov w4, w22"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FlagM/Secondary.json b/unittests/InstructionCountCI/FlagM/Secondary.json
index 4084ceb643..289a4a496e 100644
--- a/unittests/InstructionCountCI/FlagM/Secondary.json
+++ b/unittests/InstructionCountCI/FlagM/Secondary.json
@@ -14,1623 +14,2133 @@
   },
   "Instructions": {
     "ucomiss xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0x2e",
       "ExpectedArm64ASM": [
-        "fcmp s16, s17",
-        "mov w27, #0x0",
-        "cset w26, vc",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fcmp s2, s3",
+        "mov w20, #0x0",
+        "cset w21, vc",
+        "mov x26, x21",
         "axflag",
-        "cfinv"
+        "cfinv",
+        "mov x27, x20"
       ]
     },
     "comiss xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0x2f",
       "ExpectedArm64ASM": [
-        "fcmp s16, s17",
-        "mov w27, #0x0",
-        "cset w26, vc",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fcmp s2, s3",
+        "mov w20, #0x0",
+        "cset w21, vc",
+        "mov x26, x21",
         "axflag",
-        "cfinv"
+        "cfinv",
+        "mov x27, x20"
       ]
     },
     "cmovo ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x40",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, vs",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, vs",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovo eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x40",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, vs"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, vs",
+        "mov x4, x22"
       ]
     },
     "cmovo rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x40",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, vs"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, vs",
+        "mov x4, x22"
       ]
     },
     "cmovno ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x41",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, vc",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, vc",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovno eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x41",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, vc"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, vc",
+        "mov x4, x22"
       ]
     },
     "cmovno rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x41",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, vc"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, vc",
+        "mov x4, x22"
       ]
     },
     "cmovb ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x42",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, hs",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, hs",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovb eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x42",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, hs"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, hs",
+        "mov x4, x22"
       ]
     },
     "cmovb rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x42",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, hs"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, hs",
+        "mov x4, x22"
       ]
     },
     "cmovnb ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x43",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, lo",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, lo",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovnb eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x43",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, lo"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, lo",
+        "mov x4, x22"
       ]
     },
     "cmovnb rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x43",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, lo"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, lo",
+        "mov x4, x22"
       ]
     },
     "cmovz ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x44",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, eq",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, eq",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovz eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x44",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, eq"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, eq",
+        "mov x4, x22"
       ]
     },
     "cmovz rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x44",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, eq"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, eq",
+        "mov x4, x22"
       ]
     },
     "cmovnz ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x45",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, ne",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, ne",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovnz eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x45",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, ne"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, ne",
+        "mov x4, x22"
       ]
     },
     "cmovnz rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x45",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, ne"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, ne",
+        "mov x4, x22"
       ]
     },
     "cmovbe ax, bx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0x46",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, hs",
-        "csel w20, w7, w20, eq",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, hs",
+        "csel w23, w21, w22, eq",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovbe eax, ebx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x46",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, hs",
-        "csel w4, w7, w20, eq"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, hs",
+        "csel w20, w21, w22, eq",
+        "mov x4, x20"
       ]
     },
     "cmovbe rax, rbx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x46",
       "ExpectedArm64ASM": [
-        "csel x20, x7, x4, hs",
-        "csel x4, x7, x20, eq"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, hs",
+        "csel x20, x21, x22, eq",
+        "mov x4, x20"
       ]
     },
     "cmovnbe ax, bx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0x47",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, lo",
-        "csel w20, w20, w4, ne",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, lo",
+        "csel w21, w22, w20, ne",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "cmovnbe eax, ebx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x47",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, lo",
-        "csel w4, w20, w4, ne"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, lo",
+        "csel w21, w22, w20, ne",
+        "mov x4, x21"
       ]
     },
     "cmovnbe rax, rbx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x47",
       "ExpectedArm64ASM": [
-        "csel x20, x7, x4, lo",
-        "csel x4, x20, x4, ne"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, lo",
+        "csel x21, x22, x20, ne",
+        "mov x4, x21"
       ]
     },
     "cmovs ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x48",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, mi",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, mi",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovs eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x48",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, mi"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, mi",
+        "mov x4, x22"
       ]
     },
     "cmovs rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x48",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, mi"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, mi",
+        "mov x4, x22"
       ]
     },
     "cmovns ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x49",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, pl",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, pl",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovns eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x49",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, pl"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, pl",
+        "mov x4, x22"
       ]
     },
     "cmovns rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x49",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, pl"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, pl",
+        "mov x4, x22"
       ]
     },
     "cmovpe ax, bx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0x4a",
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csel w20, w7, w4, ne",
-        "bfxil x4, x20, #0, #16",
-        "msr nzcv, x21"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov x22, x26",
+        "eor w23, w22, w22, lsr #4",
+        "eor w22, w23, w23, lsr #2",
+        "eon w23, w22, w22, lsr #1",
+        "mrs x22, nzcv",
+        "tst w23, #0x1",
+        "csel w23, w21, w20, ne",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #16",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "cmovpe eax, ebx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0x4a",
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csel w4, w7, w4, ne",
-        "msr nzcv, x21"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov x22, x26",
+        "eor w23, w22, w22, lsr #4",
+        "eor w22, w23, w23, lsr #2",
+        "eon w23, w22, w22, lsr #1",
+        "mrs x22, nzcv",
+        "tst w23, #0x1",
+        "csel w23, w21, w20, ne",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "cmovpe rax, rbx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0x4a",
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csel x4, x7, x4, ne",
-        "msr nzcv, x21"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov x22, x26",
+        "eor w23, w22, w22, lsr #4",
+        "eor w22, w23, w23, lsr #2",
+        "eon w23, w22, w22, lsr #1",
+        "mrs x22, nzcv",
+        "tst w23, #0x1",
+        "csel x23, x21, x20, ne",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "cmovnp ax, bx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0x4b",
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csel w20, w7, w4, ne",
-        "bfxil x4, x20, #0, #16",
-        "msr nzcv, x21"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov x22, x26",
+        "eor w23, w22, w22, lsr #4",
+        "eor w22, w23, w23, lsr #2",
+        "eor w23, w22, w22, lsr #1",
+        "mrs x22, nzcv",
+        "tst w23, #0x1",
+        "csel w23, w21, w20, ne",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #16",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "cmovnp eax, ebx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0x4b",
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csel w4, w7, w4, ne",
-        "msr nzcv, x21"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov x22, x26",
+        "eor w23, w22, w22, lsr #4",
+        "eor w22, w23, w23, lsr #2",
+        "eor w23, w22, w22, lsr #1",
+        "mrs x22, nzcv",
+        "tst w23, #0x1",
+        "csel w23, w21, w20, ne",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "cmovnp rax, rbx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0x4b",
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csel x4, x7, x4, ne",
-        "msr nzcv, x21"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov x22, x26",
+        "eor w23, w22, w22, lsr #4",
+        "eor w22, w23, w23, lsr #2",
+        "eor w23, w22, w22, lsr #1",
+        "mrs x22, nzcv",
+        "tst w23, #0x1",
+        "csel x23, x21, x20, ne",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "cmovl ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x4c",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, lt",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, lt",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovl eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x4c",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, lt"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, lt",
+        "mov x4, x22"
       ]
     },
     "cmovl rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x4c",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, lt"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, lt",
+        "mov x4, x22"
       ]
     },
     "cmovnl ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x4d",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, ge",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, ge",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovnl eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x4d",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, ge"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, ge",
+        "mov x4, x22"
       ]
     },
     "cmovnl rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x4d",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, ge"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, ge",
+        "mov x4, x22"
       ]
     },
     "cmovle ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x4e",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, le",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, le",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovle eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x4e",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, le"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, le",
+        "mov x4, x22"
       ]
     },
     "cmovle rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x4e",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, le"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, le",
+        "mov x4, x22"
       ]
     },
     "cmovnle ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x4f",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, gt",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, gt",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovnle eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x4f",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, gt"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, gt",
+        "mov x4, x22"
       ]
     },
     "cmovnle rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x4f",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, gt"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, gt",
+        "mov x4, x22"
       ]
     },
     "seto al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x90",
       "ExpectedArm64ASM": [
         "cset x20, vs",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setno al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x91",
       "ExpectedArm64ASM": [
         "cset x20, vc",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setb al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x92",
       "ExpectedArm64ASM": [
         "cset x20, hs",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setnb al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x93",
       "ExpectedArm64ASM": [
         "cset x20, lo",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setz al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x94",
       "ExpectedArm64ASM": [
         "cset x20, eq",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setnz al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x95",
       "ExpectedArm64ASM": [
         "cset x20, ne",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setbe al": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0x96",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
         "cset x21, hs",
-        "csel x20, x20, x21, eq",
-        "bfxil x4, x20, #0, #8"
+        "csel x22, x20, x21, eq",
+        "mov x20, x4",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #8",
+        "mov x4, x21"
       ]
     },
     "setnbe al": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x97",
       "ExpectedArm64ASM": [
         "cset x20, lo",
-        "csel x20, x20, xzr, ne",
-        "bfxil x4, x20, #0, #8"
+        "csel x21, x20, xzr, ne",
+        "mov x20, x4",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22"
       ]
     },
     "sets al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x98",
       "ExpectedArm64ASM": [
         "cset x20, mi",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setns al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x99",
       "ExpectedArm64ASM": [
         "cset x20, pl",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setpe al": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0x9a",
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "and x20, x20, #0x1",
-        "bfxil x4, x20, #0, #8"
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "and x20, x21, #0x1",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setnp al": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0x9b",
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "and x20, x20, #0x1",
-        "bfxil x4, x20, #0, #8"
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "and x20, x21, #0x1",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setl al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x9c",
       "ExpectedArm64ASM": [
         "cset x20, lt",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setnl al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x9d",
       "ExpectedArm64ASM": [
         "cset x20, ge",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setle al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x9e",
       "ExpectedArm64ASM": [
         "cset x20, le",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setnle al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x9f",
       "ExpectedArm64ASM": [
         "cset x20, gt",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "bt ax, bx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xa3",
       "ExpectedArm64ASM": [
-        "and x20, x7, #0xf",
-        "lsr w20, w4, w20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "and x22, x20, #0xf",
+        "lsr w20, w21, w22",
         "rmif x20, #63, #nzCv"
       ]
     },
     "bt [rax], bx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0xa3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #13",
-        "ldrb w21, [x4, x21, sxtx]",
-        "lsr w20, w21, w20",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #13",
+        "ldrb w20, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "bt eax, ebx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0xa3",
       "ExpectedArm64ASM": [
-        "lsr w20, w4, w7",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "mov x21, x4",
+        "lsr w22, w21, w20",
+        "rmif x22, #63, #nzCv"
       ]
     },
     "bt [rax], ebx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0xa3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #29",
-        "ldrb w21, [x4, x21, sxtx]",
-        "lsr w20, w21, w20",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #29",
+        "ldrb w20, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "bt rax, rbx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0xa3",
       "ExpectedArm64ASM": [
-        "lsr x20, x4, x7",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "mov x21, x4",
+        "lsr x22, x21, x20",
+        "rmif x22, #63, #nzCv"
       ]
     },
     "bt [rax], rbx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0xa3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "asr x21, x7, #3",
-        "ldrb w21, [x4, x21, sxtx]",
-        "lsr w20, w21, w20",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "asr x23, x20, #3",
+        "ldrb w20, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "shld ax, bx, 1": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 15,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w4",
-        "lsl x22, x21, #1",
-        "lsr w20, w20, #15",
-        "orr x26, x22, x20",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "rmif x21, #14, #nzCv",
-        "eor w20, w26, w21",
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "lsl x23, x22, #1",
+        "lsr w24, w21, #15",
+        "orr x21, x23, x24",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w21, lsl #16",
+        "rmif x22, #14, #nzCv",
+        "mov x26, x21",
+        "eor w20, w21, w22",
         "rmif x20, #15, #nzcV"
       ]
     },
     "shld ax, bx, 15": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w4",
-        "lsl x22, x21, #15",
-        "lsr w20, w20, #1",
-        "orr x26, x22, x20",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "rmif x21, #0, #nzCv"
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "lsl x23, x22, #15",
+        "lsr w24, w21, #1",
+        "orr x21, x23, x24",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w21, lsl #16",
+        "rmif x22, #0, #nzCv",
+        "mov x26, x21"
       ]
     },
     "shld ax, bx, 16": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w4",
-        "lsl x22, x21, #16",
-        "orr x26, x22, x20",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "rmif x21, #63, #nzCv"
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "lsl x23, x22, #16",
+        "orr x24, x23, x21",
+        "mov x21, x20",
+        "bfxil x21, x24, #0, #16",
+        "mov x4, x21",
+        "cmn wzr, w24, lsl #16",
+        "rmif x22, #63, #nzCv",
+        "mov x26, x24"
       ]
     },
     "shld ax, bx, 31": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w4",
-        "lsl x22, x21, #31",
-        "lsr w20, w20, #17",
-        "orr x26, x22, x20",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "rmif x21, #0, #nzCv"
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "lsl x23, x22, #31",
+        "lsr w24, w21, #17",
+        "orr x21, x23, x24",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w21, lsl #16",
+        "rmif x22, #0, #nzCv",
+        "mov x26, x21"
       ]
     },
     "shld eax, ebx, 1": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "extr w4, w21, w20, #31",
-        "tst w4, w4",
-        "rmif x21, #30, #nzCv",
-        "mov x26, x4",
-        "eor w20, w4, w21",
-        "rmif x20, #31, #nzcV"
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "mov w22, w20",
+        "extr w20, w22, w21, #31",
+        "mov x4, x20",
+        "tst w20, w20",
+        "rmif x22, #30, #nzCv",
+        "mov x26, x20",
+        "eor w21, w20, w22",
+        "rmif x21, #31, #nzcV"
       ]
     },
     "shld eax, ebx, 15": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "extr w4, w21, w20, #17",
-        "tst w4, w4",
-        "rmif x21, #16, #nzCv",
-        "mov x26, x4"
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "mov w22, w20",
+        "extr w20, w22, w21, #17",
+        "mov x4, x20",
+        "tst w20, w20",
+        "rmif x22, #16, #nzCv",
+        "mov x26, x20"
       ]
     },
     "shld eax, ebx, 16": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "extr w4, w21, w20, #16",
-        "tst w4, w4",
-        "rmif x21, #15, #nzCv",
-        "mov x26, x4"
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "mov w22, w20",
+        "extr w20, w22, w21, #16",
+        "mov x4, x20",
+        "tst w20, w20",
+        "rmif x22, #15, #nzCv",
+        "mov x26, x20"
       ]
     },
     "shld eax, ebx, 31": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "extr w4, w21, w20, #1",
-        "tst w4, w4",
-        "rmif x21, #0, #nzCv",
-        "mov x26, x4"
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "mov w22, w20",
+        "extr w20, w22, w21, #1",
+        "mov x4, x20",
+        "tst w20, w20",
+        "rmif x22, #0, #nzCv",
+        "mov x26, x20"
       ]
     },
     "shld rax, rbx, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "mov x20, x4",
-        "extr x4, x20, x7, #63",
-        "tst x4, x4",
-        "rmif x20, #62, #nzCv",
-        "mov x26, x4",
-        "eor x20, x4, x20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "extr x22, x21, x20, #63",
+        "mov x4, x22",
+        "tst x22, x22",
+        "rmif x21, #62, #nzCv",
+        "mov x26, x22",
+        "eor x20, x22, x21",
         "rmif x20, #63, #nzcV"
       ]
     },
     "shld rax, rbx, 15": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "mov x20, x4",
-        "extr x4, x20, x7, #49",
-        "tst x4, x4",
-        "rmif x20, #48, #nzCv",
-        "mov x26, x4"
+        "mov x20, x7",
+        "mov x21, x4",
+        "extr x22, x21, x20, #49",
+        "mov x4, x22",
+        "tst x22, x22",
+        "rmif x21, #48, #nzCv",
+        "mov x26, x22"
       ]
     },
     "shld rax, rbx, 32": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "mov x20, x4",
-        "extr x4, x20, x7, #32",
-        "tst x4, x4",
-        "rmif x20, #31, #nzCv",
-        "mov x26, x4"
+        "mov x20, x7",
+        "mov x21, x4",
+        "extr x22, x21, x20, #32",
+        "mov x4, x22",
+        "tst x22, x22",
+        "rmif x21, #31, #nzCv",
+        "mov x26, x22"
       ]
     },
     "shld rax, rbx, 63": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "mov x20, x4",
-        "extr x4, x20, x7, #1",
-        "tst x4, x4",
-        "rmif x20, #0, #nzCv",
-        "mov x26, x4"
+        "mov x20, x7",
+        "mov x21, x4",
+        "extr x22, x21, x20, #1",
+        "mov x4, x22",
+        "tst x22, x22",
+        "rmif x21, #0, #nzCv",
+        "mov x26, x22"
       ]
     },
     "shld ax, bx, cl": {
-      "ExpectedInstructionCount": 22,
+      "ExpectedInstructionCount": 30,
       "Comment": "0x0f 0xad",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w4",
-        "and x22, x5, #0x1f",
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "mov x23, x5",
+        "and x24, x23, #0x1f",
         "mov w23, #0x10",
-        "sub x23, x23, x22",
-        "lsl x24, x21, x22",
-        "lsr w20, w20, w23",
-        "orr x20, x24, x20",
+        "sub x25, x23, x24",
+        "lsl x23, x22, x24",
+        "lsr w30, w21, w25",
+        "orr x21, x23, x30",
         "mrs x23, nzcv",
-        "cmp x22, #0x0 (0)",
-        "csel x20, x21, x20, eq",
-        "bfxil x4, x20, #0, #16",
+        "cmp x24, #0x0 (0)",
+        "csel x25, x22, x21, eq",
+        "mov x21, x20",
+        "bfxil x21, x25, #0, #16",
+        "mov x4, x21",
+        "mov x20, x26",
         "msr nzcv, x23",
-        "cbz w22, #+0x24",
-        "cmn wzr, w20, lsl #16",
-        "mov x26, x20",
+        "mov x21, x20",
+        "cbz w24, #+0x24",
+        "cmn wzr, w25, lsl #16",
+        "mov x21, x25",
         "mov w0, #0x10",
-        "sub w0, w0, w22",
-        "lsr w0, w21, w0",
-        "eor w2, w21, w20",
+        "sub w0, w0, w24",
+        "lsr w0, w22, w0",
+        "eor w2, w22, w25",
         "rmif x0, #63, #nzCv",
-        "rmif x2, #15, #nzcV"
+        "rmif x2, #15, #nzcV",
+        "mov x26, x21"
       ]
     },
     "shld eax, ebx, cl": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 26,
       "Comment": "0x0f 0xad",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "and x22, x5, #0x1f",
-        "neg x23, x22",
-        "lsl x24, x21, x22",
-        "lsr w20, w20, w23",
-        "orr x20, x24, x20",
-        "mrs x23, nzcv",
-        "cmp x22, #0x0 (0)",
-        "csel x20, x21, x20, eq",
-        "mov w4, w20",
-        "msr nzcv, x23",
-        "cbz w22, #+0x1c",
-        "ands w26, w20, w20",
-        "neg w0, w22",
-        "lsr w0, w21, w0",
-        "eor w2, w21, w20",
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "mov w22, w20",
+        "mov x20, x5",
+        "and x23, x20, #0x1f",
+        "neg x20, x23",
+        "lsl x24, x22, x23",
+        "lsr w25, w21, w20",
+        "orr x20, x24, x25",
+        "mrs x21, nzcv",
+        "cmp x23, #0x0 (0)",
+        "csel x24, x22, x20, eq",
+        "mov w20, w24",
+        "mov x4, x20",
+        "mov x20, x26",
+        "msr nzcv, x21",
+        "mov x21, x20",
+        "cbz w23, #+0x1c",
+        "ands w21, w24, w24",
+        "neg w0, w23",
+        "lsr w0, w22, w0",
+        "eor w2, w22, w24",
         "rmif x0, #63, #nzCv",
-        "rmif x2, #31, #nzcV"
+        "rmif x2, #31, #nzcV",
+        "mov x26, x21"
       ]
     },
     "shld rax, rbx, cl": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 23,
       "Comment": "0x0f 0xad",
       "ExpectedArm64ASM": [
-        "mov x20, x4",
-        "and x21, x5, #0x3f",
-        "neg x22, x21",
-        "lsl x23, x20, x21",
-        "lsr x22, x7, x22",
-        "orr x22, x23, x22",
-        "mrs x23, nzcv",
-        "cmp x21, #0x0 (0)",
-        "csel x4, x20, x22, eq",
-        "msr nzcv, x23",
-        "cbz x21, #+0x1c",
-        "ands x26, x4, x4",
-        "neg x0, x21",
-        "lsr x0, x20, x0",
-        "eor x2, x20, x4",
+        "mov x20, x7",
+        "mov x21, x4",
+        "mov x22, x5",
+        "and x23, x22, #0x3f",
+        "neg x22, x23",
+        "lsl x24, x21, x23",
+        "lsr x25, x20, x22",
+        "orr x20, x24, x25",
+        "mrs x22, nzcv",
+        "cmp x23, #0x0 (0)",
+        "csel x24, x21, x20, eq",
+        "mov x4, x24",
+        "mov x20, x26",
+        "msr nzcv, x22",
+        "mov x22, x20",
+        "cbz x23, #+0x1c",
+        "ands x22, x24, x24",
+        "neg x0, x23",
+        "lsr x0, x21, x0",
+        "eor x2, x21, x24",
         "rmif x0, #63, #nzCv",
-        "rmif x2, #63, #nzcV"
+        "rmif x2, #63, #nzcV",
+        "mov x26, x22"
       ]
     },
     "bts ax, bx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0xab",
       "ExpectedArm64ASM": [
-        "and x20, x7, #0xf",
-        "lsr w21, w4, w20",
-        "rmif x21, #63, #nzCv",
-        "mov w21, #0x1",
-        "lsl w20, w21, w20",
-        "orr w20, w4, w20",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x7",
+        "mov x21, x4",
+        "and x22, x20, #0xf",
+        "lsr w20, w21, w22",
+        "rmif x20, #63, #nzCv",
+        "mov w20, #0x1",
+        "lsl w23, w20, w22",
+        "orr w20, w21, w23",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "bts [rax], bx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0xab",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #13",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "orr x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #13",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "orr x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "bts eax, ebx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xab",
       "ExpectedArm64ASM": [
-        "lsr w20, w4, w7",
-        "rmif x20, #63, #nzCv",
-        "mov w20, #0x1",
-        "lsl w20, w20, w7",
-        "orr w4, w4, w20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "lsr w22, w21, w20",
+        "rmif x22, #63, #nzCv",
+        "mov w22, #0x1",
+        "lsl w23, w22, w20",
+        "orr w20, w21, w23",
+        "mov x4, x20"
       ]
     },
     "bts [rax], ebx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0xab",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #29",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "orr x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #29",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "orr x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "bts rax, rbx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xab",
       "ExpectedArm64ASM": [
-        "lsr x20, x4, x7",
-        "rmif x20, #63, #nzCv",
-        "mov w20, #0x1",
-        "lsl x20, x20, x7",
-        "orr x4, x4, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "lsr x22, x21, x20",
+        "rmif x22, #63, #nzCv",
+        "mov w22, #0x1",
+        "lsl x23, x22, x20",
+        "orr x20, x21, x23",
+        "mov x4, x20"
       ]
     },
     "bts [rax], rbx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0xab",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "asr x21, x7, #3",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "orr x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "asr x23, x20, #3",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "orr x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "lock bts [rax], bx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #13",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldsetalb w22, w21, [x21]",
-        "lsr w20, w21, w20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #13",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldsetalb w23, w21, [x20]",
+        "lsr w20, w21, w22",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock bts [rax], ebx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #29",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldsetalb w22, w21, [x21]",
-        "lsr w20, w21, w20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #29",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldsetalb w23, w21, [x20]",
+        "lsr w20, w21, w22",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock bts [rax], rbx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "asr x21, x7, #3",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldsetalb w22, w21, [x21]",
-        "lsr w20, w21, w20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "asr x23, x20, #3",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldsetalb w23, w21, [x20]",
+        "lsr w20, w21, w22",
         "rmif x20, #63, #nzCv"
       ]
     },
     "imul ax, bx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x0f 0xaf",
       "ExpectedArm64ASM": [
-        "sxth x20, w4",
-        "sxth x21, w7",
-        "mul x20, x20, x21",
-        "sbfx x21, x20, #16, #16",
-        "bfxil x4, x20, #0, #16",
-        "sbfx x20, x20, #15, #1",
-        "cmp x21, x20",
+        "mov x20, x4",
+        "mov x21, x7",
+        "sxth x22, w20",
+        "sxth x23, w21",
+        "mul x21, x22, x23",
+        "sbfx x22, x21, #16, #16",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "sbfx x20, x21, #15, #1",
+        "cmp x22, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul eax, ebx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xaf",
       "ExpectedArm64ASM": [
-        "smull x20, w4, w7",
-        "asr x20, x20, #32",
-        "mul w4, w4, w7",
-        "sbfx x21, x4, #31, #1",
-        "cmp x20, x21",
+        "mov x20, x4",
+        "mov x21, x7",
+        "smull x22, w20, w21",
+        "asr x23, x22, #32",
+        "mul w22, w20, w21",
+        "mov x4, x22",
+        "sbfx x20, x22, #31, #1",
+        "cmp x23, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul rax, rbx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xaf",
       "ExpectedArm64ASM": [
-        "smulh x20, x4, x7",
-        "mul x4, x4, x7",
-        "asr x21, x4, #63",
-        "cmp x20, x21",
+        "mov x20, x4",
+        "mov x21, x7",
+        "smulh x22, x20, x21",
+        "mul x23, x20, x21",
+        "mov x4, x23",
+        "asr x20, x23, #63",
+        "cmp x22, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "cmpxchg cl, bl": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 20,
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "uxtb w21, w5",
-        "uxtb x22, w4",
-        "eor w27, w22, w21",
-        "lsl w0, w22, #24",
-        "cmp w0, w21, lsl #24",
-        "sub w26, w22, w21",
+        "mov x20, x7",
+        "uxtb w21, w20",
+        "mov x20, x5",
+        "uxtb w22, w20",
+        "mov x23, x4",
+        "uxtb x24, w23",
+        "eor w25, w24, w22",
+        "mov x27, x25",
+        "lsl w0, w24, #24",
+        "cmp w0, w22, lsl #24",
+        "sub w25, w24, w22",
+        "mov x26, x25",
         "cfinv",
-        "bfxil x4, x21, #0, #8",
-        "csel x20, x20, x21, eq",
-        "bfxil x5, x20, #0, #8"
+        "mov x24, x23",
+        "bfxil x24, x22, #0, #8",
+        "mov x4, x24",
+        "csel x23, x21, x22, eq",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #8",
+        "mov x5, x21"
       ]
     },
     "cmpxchg cx, bx": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 20,
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w5",
-        "uxth x22, w4",
-        "eor w27, w22, w21",
-        "lsl w0, w22, #16",
-        "cmp w0, w21, lsl #16",
-        "sub w26, w22, w21",
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x5",
+        "uxth w22, w20",
+        "mov x23, x4",
+        "uxth x24, w23",
+        "eor w25, w24, w22",
+        "mov x27, x25",
+        "lsl w0, w24, #16",
+        "cmp w0, w22, lsl #16",
+        "sub w25, w24, w22",
+        "mov x26, x25",
         "cfinv",
-        "bfxil x4, x21, #0, #16",
-        "csel x20, x20, x21, eq",
-        "bfxil x5, x20, #0, #16"
+        "mov x24, x23",
+        "bfxil x24, x22, #0, #16",
+        "mov x4, x24",
+        "csel x23, x21, x22, eq",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #16",
+        "mov x5, x21"
       ]
     },
     "cmpxchg ecx, ebx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 15,
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w5",
-        "mov w22, w4",
-        "eor w27, w22, w21",
-        "subs w26, w22, w21",
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x5",
+        "mov x22, x4",
+        "mov w23, w20",
+        "mov w24, w22",
+        "eor w25, w24, w23",
+        "mov x27, x25",
+        "subs w25, w24, w23",
+        "mov x26, x25",
         "cfinv",
-        "csel x4, x4, x21, eq",
-        "csel x5, x20, x5, eq"
+        "csel x24, x22, x23, eq",
+        "mov x4, x24",
+        "csel x22, x21, x20, eq",
+        "mov x5, x22"
       ]
     },
     "cmpxchg rcx, rbx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "ExpectedArm64ASM": [
-        "mov x20, x5",
-        "eor w27, w4, w20",
-        "subs x26, x4, x20",
+        "mov x20, x7",
+        "mov x21, x5",
+        "mov x22, x4",
+        "eor w23, w22, w21",
+        "mov x27, x23",
+        "subs x23, x22, x21",
+        "mov x26, x23",
         "cfinv",
-        "mov x4, x20",
-        "csel x5, x7, x20, eq"
+        "mov x4, x21",
+        "csel x22, x20, x21, eq",
+        "mov x5, x22"
       ]
     },
     "cmpxchg [rax], rbx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0xb1",
       "ExpectedArm64ASM": [
-        "mov x20, x4",
-        "mov x1, x20",
-        "casal x1, x7, [x20]",
-        "mov x4, x1",
-        "eor w27, w20, w4",
-        "subs x26, x20, x4",
+        "mov x20, x7",
+        "mov x21, x4",
+        "mov x1, x21",
+        "casal x1, x20, [x21]",
+        "mov x22, x1",
+        "mov x4, x22",
+        "eor w20, w21, w22",
+        "mov x27, x20",
+        "subs x20, x21, x22",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "cmpxchg al, bl": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 15,
       "Comment": "0x0f 0xb0",
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "uxtb w21, w4",
-        "uxtb x22, w4",
-        "eor w27, w22, w21",
-        "lsl w0, w22, #24",
-        "cmp w0, w21, lsl #24",
-        "sub w26, w22, w21",
+        "mov x20, x7",
+        "uxtb w21, w20",
+        "mov x20, x4",
+        "uxtb w22, w20",
+        "uxtb x23, w20",
+        "eor w24, w23, w22",
+        "mov x27, x24",
+        "lsl w0, w23, #24",
+        "cmp w0, w22, lsl #24",
+        "sub w24, w23, w22",
+        "mov x26, x24",
         "cfinv",
-        "bfxil x4, x20, #0, #8"
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22"
       ]
     },
     "cmpxchg [rax], bl": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 17,
       "Comment": "0x0f 0xb0",
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "uxtb w21, w4",
-        "mov w1, w21",
-        "casalb w1, w20, [x4]",
-        "mov w20, w1",
-        "bfxil x4, x20, #0, #8",
-        "eor w27, w21, w20",
-        "lsl w0, w21, #24",
-        "cmp w0, w20, lsl #24",
-        "sub w26, w21, w20",
+        "mov x20, x7",
+        "uxtb w21, w20",
+        "mov x20, x4",
+        "uxtb w22, w20",
+        "mov w1, w22",
+        "casalb w1, w21, [x20]",
+        "mov w23, w1",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #8",
+        "mov x4, x21",
+        "eor w20, w22, w23",
+        "mov x27, x20",
+        "lsl w0, w22, #24",
+        "cmp w0, w23, lsl #24",
+        "sub w20, w22, w23",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "cmpxchg ax, bx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 15,
       "Comment": "0x0f 0xb1",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w4",
-        "uxth x22, w4",
-        "eor w27, w22, w21",
-        "lsl w0, w22, #16",
-        "cmp w0, w21, lsl #16",
-        "sub w26, w22, w21",
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "uxth x23, w20",
+        "eor w24, w23, w22",
+        "mov x27, x24",
+        "lsl w0, w23, #16",
+        "cmp w0, w22, lsl #16",
+        "sub w24, w23, w22",
+        "mov x26, x24",
         "cfinv",
-        "bfxil x4, x20, #0, #16"
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "cmpxchg [rax], bx": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 17,
       "Comment": "0x0f 0xb1",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w4",
-        "mov w1, w21",
-        "casalh w1, w20, [x4]",
-        "mov w20, w1",
-        "bfxil x4, x20, #0, #16",
-        "eor w27, w21, w20",
-        "lsl w0, w21, #16",
-        "cmp w0, w20, lsl #16",
-        "sub w26, w21, w20",
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "mov w1, w22",
+        "casalh w1, w21, [x20]",
+        "mov w23, w1",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #16",
+        "mov x4, x21",
+        "eor w20, w22, w23",
+        "mov x27, x20",
+        "lsl w0, w22, #16",
+        "cmp w0, w23, lsl #16",
+        "sub w20, w22, w23",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "cmpxchg eax, ebx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0xb1",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "mov w22, w4",
-        "eor w27, w22, w21",
-        "subs w26, w22, w21",
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "mov w22, w20",
+        "mov w23, w20",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "subs w20, w23, w22",
+        "mov x26, x20",
         "cfinv",
-        "mov x4, x20"
+        "mov x4, x21"
       ]
     },
     "cmpxchg [rax], ebx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 15,
       "Comment": "0x0f 0xb1",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "mov w1, w21",
-        "casal w1, w20, [x4]",
-        "mov w20, w1",
-        "cmp w20, w21",
-        "csel x4, x4, x20, eq",
-        "eor w27, w21, w20",
-        "subs w26, w21, w20",
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "mov w22, w20",
+        "mov w1, w22",
+        "casal w1, w21, [x20]",
+        "mov w23, w1",
+        "cmp w23, w22",
+        "csel x21, x20, x23, eq",
+        "mov x4, x21",
+        "eor w20, w22, w23",
+        "mov x27, x20",
+        "subs w20, w22, w23",
+        "mov x26, x20",
         "cfinv"
       ]
     },
     "cmpxchg rax, rbx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xb1",
       "ExpectedArm64ASM": [
         "mov x20, x7",
-        "mov w27, #0x0",
-        "subs x26, x4, x4",
+        "mov x21, x4",
+        "mov w22, #0x0",
+        "mov x27, x22",
+        "subs x22, x21, x21",
+        "mov x26, x22",
         "cfinv",
         "mov x4, x20"
       ]
     },
     "btr ax, bx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "and x20, x7, #0xf",
-        "lsr w21, w4, w20",
-        "rmif x21, #63, #nzCv",
-        "mov w21, #0x1",
-        "lsl w20, w21, w20",
-        "bic w20, w4, w20",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x7",
+        "mov x21, x4",
+        "and x22, x20, #0xf",
+        "lsr w20, w21, w22",
+        "rmif x20, #63, #nzCv",
+        "mov w20, #0x1",
+        "lsl w23, w20, w22",
+        "bic w20, w21, w23",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "btr [rax], bx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #13",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "bic x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #13",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "bic x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "btr eax, ebx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "lsr w20, w4, w7",
-        "rmif x20, #63, #nzCv",
-        "mov w20, #0x1",
-        "lsl w20, w20, w7",
-        "bic w4, w4, w20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "lsr w22, w21, w20",
+        "rmif x22, #63, #nzCv",
+        "mov w22, #0x1",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
+        "mov x4, x20"
       ]
     },
     "btr [rax], ebx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #29",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "bic x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #29",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "bic x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "btr rax, rbx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "lsr x20, x4, x7",
-        "rmif x20, #63, #nzCv",
-        "mov w20, #0x1",
-        "lsl x20, x20, x7",
-        "bic x4, x4, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "lsr x22, x21, x20",
+        "rmif x22, #63, #nzCv",
+        "mov w22, #0x1",
+        "lsl x23, x22, x20",
+        "bic x20, x21, x23",
+        "mov x4, x20"
       ]
     },
     "btr [rax], rbx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "asr x21, x7, #3",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "bic x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "asr x23, x20, #3",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "bic x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "lock btr [rax], bx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #13",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldclralb w22, w21, [x21]",
-        "lsr w20, w21, w20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #13",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldclralb w23, w21, [x20]",
+        "lsr w20, w21, w22",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock btr [rax], ebx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #29",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldclralb w22, w21, [x21]",
-        "lsr w20, w21, w20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #29",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldclralb w23, w21, [x20]",
+        "lsr w20, w21, w22",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock btr [rax], rbx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "asr x21, x7, #3",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldclralb w22, w21, [x21]",
-        "lsr w20, w21, w20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "asr x23, x20, #3",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldclralb w23, w21, [x20]",
+        "lsr w20, w21, w22",
         "rmif x20, #63, #nzCv"
       ]
     },
     "btc ax, bx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0xbb",
       "ExpectedArm64ASM": [
-        "and x20, x7, #0xf",
-        "lsr w21, w4, w20",
-        "rmif x21, #63, #nzCv",
-        "mov w21, #0x1",
-        "lsl w20, w21, w20",
-        "eor w20, w4, w20",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x7",
+        "mov x21, x4",
+        "and x22, x20, #0xf",
+        "lsr w20, w21, w22",
+        "rmif x20, #63, #nzCv",
+        "mov w20, #0x1",
+        "lsl w23, w20, w22",
+        "eor w20, w21, w23",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "btc [rax], bx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0xbb",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #13",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "eor x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #13",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "eor x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "btc eax, ebx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xbb",
       "ExpectedArm64ASM": [
-        "lsr w20, w4, w7",
-        "rmif x20, #63, #nzCv",
-        "mov w20, #0x1",
-        "lsl w20, w20, w7",
-        "eor w4, w4, w20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "lsr w22, w21, w20",
+        "rmif x22, #63, #nzCv",
+        "mov w22, #0x1",
+        "lsl w23, w22, w20",
+        "eor w20, w21, w23",
+        "mov x4, x20"
       ]
     },
     "btc [rax], ebx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0xbb",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #29",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "eor x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #29",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "eor x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "btc rax, rbx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xbb",
       "ExpectedArm64ASM": [
-        "lsr x20, x4, x7",
-        "rmif x20, #63, #nzCv",
-        "mov w20, #0x1",
-        "lsl x20, x20, x7",
-        "eor x4, x4, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "lsr x22, x21, x20",
+        "rmif x22, #63, #nzCv",
+        "mov w22, #0x1",
+        "lsl x23, x22, x20",
+        "eor x20, x21, x23",
+        "mov x4, x20"
       ]
     },
     "btc [rax], rbx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0xbb",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "asr x21, x7, #3",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "eor x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "asr x23, x20, #3",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "eor x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "lock btc [rax], bx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #13",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldeoralb w22, w21, [x21]",
-        "lsr w20, w21, w20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #13",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldeoralb w23, w21, [x20]",
+        "lsr w20, w21, w22",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock btc [rax], ebx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #29",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldeoralb w22, w21, [x21]",
-        "lsr w20, w21, w20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #29",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldeoralb w23, w21, [x20]",
+        "lsr w20, w21, w22",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock btc [rax], rbx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "asr x21, x7, #3",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldeoralb w22, w21, [x21]",
-        "lsr w20, w21, w20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "asr x23, x20, #3",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldeoralb w23, w21, [x20]",
+        "lsr w20, w21, w22",
         "rmif x20, #63, #nzCv"
       ]
     },
     "bsf ax, bx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 14,
       "Comment": "0x0f 0xbc",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "uxth w21, w7",
-        "uxth w0, w21",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "mov x22, x7",
+        "uxth w23, w22",
+        "uxth w0, w23",
         "cmp w0, #0x0 (0)",
         "rbit w0, w0",
         "clz w22, w0",
         "csinv w22, w22, wzr, ne",
-        "cmn wzr, w21, lsl #16",
-        "csel x20, x20, x22, eq",
-        "bfxil x4, x20, #0, #16"
+        "cmn wzr, w23, lsl #16",
+        "csel x23, x21, x22, eq",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #16",
+        "mov x4, x21"
       ]
     },
     "bsf eax, ebx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0xbc",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "lsr w0, w20, #0",
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov w22, w21",
+        "lsr w0, w22, #0",
         "cmp w0, #0x0 (0)",
         "rbit w0, w0",
         "clz w21, w0",
         "csinv w21, w21, wzr, ne",
-        "tst w20, w20",
-        "csel x4, x4, x21, eq"
+        "tst w22, w22",
+        "csel x22, x20, x21, eq",
+        "mov x4, x22"
       ]
     },
     "bsf rax, rbx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xbc",
       "ExpectedArm64ASM": [
-        "rbit x0, x7",
-        "cmp x7, #0x0 (0)",
-        "clz x20, x0",
-        "csinv x20, x20, xzr, ne",
-        "tst x7, x7",
-        "csel x4, x4, x20, eq"
+        "mov x20, x4",
+        "mov x21, x7",
+        "rbit x0, x21",
+        "cmp x21, #0x0 (0)",
+        "clz x22, x0",
+        "csinv x22, x22, xzr, ne",
+        "tst x21, x21",
+        "csel x21, x20, x22, eq",
+        "mov x4, x21"
       ]
     },
     "bsr ax, bx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 14,
       "Comment": "0x0f 0xbd",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "uxth w21, w7",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "mov x22, x7",
+        "uxth w23, w22",
         "mov x0, #0xf",
-        "lsl w22, w21, #16",
+        "lsl w22, w23, #16",
         "orr w22, w22, #0x8000",
         "clz w22, w22",
         "sub x22, x0, x22",
-        "cmn wzr, w21, lsl #16",
-        "csel x20, x20, x22, eq",
-        "bfxil x4, x20, #0, #16"
+        "cmn wzr, w23, lsl #16",
+        "csel x23, x21, x22, eq",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #16",
+        "mov x4, x21"
       ]
     },
     "bsr eax, ebx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xbd",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov w22, w21",
         "mov x0, #0x1f",
-        "clz w21, w20",
+        "clz w21, w22",
         "sub x21, x0, x21",
-        "tst w20, w20",
-        "csel x4, x4, x21, eq"
+        "tst w22, w22",
+        "csel x22, x20, x21, eq",
+        "mov x4, x22"
       ]
     },
     "bsr rax, rbx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xbd",
       "ExpectedArm64ASM": [
+        "mov x20, x4",
+        "mov x21, x7",
         "mov x0, #0x3f",
-        "clz x20, x7",
-        "sub x20, x0, x20",
-        "tst x7, x7",
-        "csel x4, x4, x20, eq"
+        "clz x22, x21",
+        "sub x22, x0, x22",
+        "tst x21, x21",
+        "csel x21, x20, x22, eq",
+        "mov x4, x21"
       ]
     },
     "xadd al, bl": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 16,
       "Comment": "0x0f 0xc0",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "uxtb w21, w7",
-        "eor w27, w20, w21",
-        "lsl w0, w20, #24",
-        "cmn w0, w21, lsl #24",
-        "add w26, w20, w21",
-        "bfxil x7, x20, #0, #8",
-        "bfxil x4, x26, #0, #8"
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "mov x22, x7",
+        "uxtb w23, w22",
+        "eor w24, w21, w23",
+        "mov x27, x24",
+        "lsl w0, w21, #24",
+        "cmn w0, w23, lsl #24",
+        "add w24, w21, w23",
+        "mov x26, x24",
+        "mov x23, x22",
+        "bfxil x23, x21, #0, #8",
+        "mov x7, x23",
+        "mov x21, x20",
+        "bfxil x21, x24, #0, #8",
+        "mov x4, x21"
       ]
     },
     "xadd [rax], bl": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xc0",
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "ldaddalb w20, w21, [x4]",
-        "bfxil x7, x21, #0, #8",
-        "eor w27, w21, w20",
-        "lsl w0, w21, #24",
-        "cmn w0, w20, lsl #24",
-        "add w26, w21, w20"
+        "mov x20, x4",
+        "mov x21, x7",
+        "uxtb w22, w21",
+        "ldaddalb w22, w23, [x20]",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #8",
+        "mov x7, x20",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "lsl w0, w23, #24",
+        "cmn w0, w22, lsl #24",
+        "add w20, w23, w22",
+        "mov x26, x20"
       ]
     },
     "xadd ax, bx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 16,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "uxth w21, w7",
-        "eor w27, w20, w21",
-        "lsl w0, w20, #16",
-        "cmn w0, w21, lsl #16",
-        "add w26, w20, w21",
-        "bfxil x7, x20, #0, #16",
-        "bfxil x4, x26, #0, #16"
+        "mov x20, x4",
+        "uxth w21, w20",
+        "mov x22, x7",
+        "uxth w23, w22",
+        "eor w24, w21, w23",
+        "mov x27, x24",
+        "lsl w0, w21, #16",
+        "cmn w0, w23, lsl #16",
+        "add w24, w21, w23",
+        "mov x26, x24",
+        "mov x23, x22",
+        "bfxil x23, x21, #0, #16",
+        "mov x7, x23",
+        "mov x21, x20",
+        "bfxil x21, x24, #0, #16",
+        "mov x4, x21"
       ]
     },
     "xadd [rax], bx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "ldaddalh w20, w21, [x4]",
-        "bfxil x7, x21, #0, #16",
-        "eor w27, w21, w20",
-        "lsl w0, w21, #16",
-        "cmn w0, w20, lsl #16",
-        "add w26, w21, w20"
+        "mov x20, x4",
+        "mov x21, x7",
+        "uxth w22, w21",
+        "ldaddalh w22, w23, [x20]",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x7, x20",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "lsl w0, w23, #16",
+        "cmn w0, w22, lsl #16",
+        "add w20, w23, w22",
+        "mov x26, x20"
       ]
     },
     "xadd eax, ebx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "mov w21, w7",
-        "eor w27, w20, w21",
-        "adds w26, w20, w21",
-        "mov x7, x20",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mov w21, w20",
+        "mov x20, x7",
+        "mov w22, w20",
+        "eor w20, w21, w22",
+        "mov x27, x20",
+        "adds w20, w21, w22",
+        "mov x26, x20",
+        "mov x7, x21",
+        "mov x4, x20"
       ]
     },
     "xadd [rax], ebx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "ldaddal w20, w7, [x4]",
-        "eor w27, w7, w20",
-        "adds w26, w7, w20"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov w22, w21",
+        "ldaddal w22, w21, [x20]",
+        "mov x7, x21",
+        "eor w20, w21, w22",
+        "mov x27, x20",
+        "adds w20, w21, w22",
+        "mov x26, x20"
       ]
     },
     "xadd rax, rbx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "eor w27, w20, w7",
-        "adds x26, x20, x7",
+        "mov x21, x7",
+        "eor w22, w20, w21",
+        "mov x27, x22",
+        "adds x22, x20, x21",
+        "mov x26, x22",
         "mov x7, x20",
-        "mov x4, x26"
+        "mov x4, x22"
       ]
     },
     "xadd [rax], rbx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
-        "mov x20, x7",
-        "ldaddal x20, x7, [x4]",
-        "eor w27, w7, w20",
-        "adds x26, x7, x20"
+        "mov x20, x4",
+        "mov x21, x7",
+        "ldaddal x21, x22, [x20]",
+        "mov x7, x22",
+        "eor w20, w22, w21",
+        "mov x27, x20",
+        "adds x20, x22, x21",
+        "mov x26, x20"
       ]
     },
     "pmovmskb eax, mm0": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xd7",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #2272]",
-        "cmlt v2.16b, v2.16b, #0",
-        "and v2.16b, v2.16b, v3.16b",
-        "addp v2.16b, v2.16b, v2.16b",
-        "addp v2.8b, v2.8b, v2.8b",
-        "addp v2.8b, v2.8b, v2.8b",
-        "umov w4, v2.h[0]"
+        "cmlt v4.16b, v2.16b, #0",
+        "and v2.16b, v4.16b, v3.16b",
+        "addp v3.16b, v2.16b, v2.16b",
+        "addp v2.8b, v3.8b, v3.8b",
+        "addp v3.8b, v2.8b, v2.8b",
+        "umov w20, v3.h[0]",
+        "mov x4, x20"
       ]
     },
     "maskmovq mm0, mm1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xf7",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
-        "cmlt v2.16b, v2.16b, #0",
-        "ldr d3, [x28, #768]",
-        "ldr d4, [x11]",
-        "bsl v2.8b, v3.8b, v4.8b",
-        "str d2, [x11]"
+        "cmlt v3.16b, v2.16b, #0",
+        "ldr d2, [x28, #768]",
+        "mov x20, x11",
+        "ldr d4, [x20]",
+        "mov v5.8b, v3.8b",
+        "bsl v5.8b, v2.8b, v4.8b",
+        "str d5, [x20]"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FlagM/SecondaryGroup.json b/unittests/InstructionCountCI/FlagM/SecondaryGroup.json
index 1a465a88f6..5e8938ca0e 100644
--- a/unittests/InstructionCountCI/FlagM/SecondaryGroup.json
+++ b/unittests/InstructionCountCI/FlagM/SecondaryGroup.json
@@ -13,773 +13,916 @@
   },
   "Instructions": {
     "sgdt [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP7 0x0F 0x1 /0",
       "ExpectedArm64ASM": [
-        "mov w20, #0x0",
-        "strh w20, [x4]",
-        "mov x20, #0xfffffffffffe0000",
-        "stur x20, [x4, #2]"
+        "mov x20, x4",
+        "mov w21, #0x0",
+        "strh w21, [x20]",
+        "mov x21, #0xfffffffffffe0000",
+        "stur x21, [x20, #2]"
       ]
     },
     "bt ax, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP8 0x0F 0xBA /4",
       "ExpectedArm64ASM": [
-        "rmif x4, #63, #nzCv"
+        "mov x20, x4",
+        "rmif x20, #63, #nzCv"
       ]
     },
     "bt eax, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP8 0x0F 0xBA /4",
       "ExpectedArm64ASM": [
-        "rmif x4, #63, #nzCv"
+        "mov x20, x4",
+        "rmif x20, #63, #nzCv"
       ]
     },
     "bt rax, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP8 0x0F 0xBA /4",
       "ExpectedArm64ASM": [
-        "rmif x4, #63, #nzCv"
+        "mov x20, x4",
+        "rmif x20, #63, #nzCv"
       ]
     },
     "bt ax, 15": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP8 0x0F 0xBA /4",
       "ExpectedArm64ASM": [
-        "rmif x4, #14, #nzCv"
+        "mov x20, x4",
+        "rmif x20, #14, #nzCv"
       ]
     },
     "bt eax, 31": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP8 0x0F 0xBA /4",
       "ExpectedArm64ASM": [
-        "rmif x4, #30, #nzCv"
+        "mov x20, x4",
+        "rmif x20, #30, #nzCv"
       ]
     },
     "bt rax, 63": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP8 0x0F 0xBA /4",
       "ExpectedArm64ASM": [
-        "rmif x4, #62, #nzCv"
+        "mov x20, x4",
+        "rmif x20, #62, #nzCv"
       ]
     },
     "bt word [rax], 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "bt dword [rax], 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "bt qword [rax], 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "bt word [rax], 15": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #1]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "ldrb w21, [x20, #1]",
+        "lsr w20, w21, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "bt dword [rax], 31": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #3]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "ldrb w21, [x20, #3]",
+        "lsr w20, w21, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "bt qword [rax], 63": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #7]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "ldrb w21, [x20, #7]",
+        "lsr w20, w21, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "bts ax, 0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /5",
       "ExpectedArm64ASM": [
-        "rmif x4, #63, #nzCv",
-        "orr w20, w4, #0x1",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "rmif x20, #63, #nzCv",
+        "orr w21, w20, #0x1",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "bts eax, 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /5",
       "ExpectedArm64ASM": [
-        "rmif x4, #63, #nzCv",
-        "orr w4, w4, #0x1"
+        "mov x20, x4",
+        "rmif x20, #63, #nzCv",
+        "orr w21, w20, #0x1",
+        "mov x4, x21"
       ]
     },
     "bts rax, 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /5",
       "ExpectedArm64ASM": [
-        "rmif x4, #63, #nzCv",
-        "orr x4, x4, #0x1"
+        "mov x20, x4",
+        "rmif x20, #63, #nzCv",
+        "orr x21, x20, #0x1",
+        "mov x4, x21"
       ]
     },
     "bts ax, 15": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /5",
       "ExpectedArm64ASM": [
-        "rmif x4, #14, #nzCv",
-        "orr w20, w4, #0x8000",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "rmif x20, #14, #nzCv",
+        "orr w21, w20, #0x8000",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "bts eax, 31": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /5",
       "ExpectedArm64ASM": [
-        "rmif x4, #30, #nzCv",
-        "orr w4, w4, #0x80000000"
+        "mov x20, x4",
+        "rmif x20, #30, #nzCv",
+        "orr w21, w20, #0x80000000",
+        "mov x4, x21"
       ]
     },
     "bts rax, 63": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /5",
       "ExpectedArm64ASM": [
-        "rmif x4, #62, #nzCv",
-        "orr x4, x4, #0x8000000000000000"
+        "mov x20, x4",
+        "rmif x20, #62, #nzCv",
+        "orr x21, x20, #0x8000000000000000",
+        "mov x4, x21"
       ]
     },
     "bts word [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "orr x21, x20, #0x1",
-        "strb w21, [x4]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "orr x22, x21, #0x1",
+        "strb w22, [x20]",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "bts dword [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "orr x21, x20, #0x1",
-        "strb w21, [x4]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "orr x22, x21, #0x1",
+        "strb w22, [x20]",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "bts qword [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "orr x21, x20, #0x1",
-        "strb w21, [x4]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "orr x22, x21, #0x1",
+        "strb w22, [x20]",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "bts word [rax], 15": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #1]",
-        "orr x21, x20, #0x80",
-        "strb w21, [x4, #1]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "ldrb w21, [x20, #1]",
+        "orr x22, x21, #0x80",
+        "strb w22, [x20, #1]",
+        "lsr w20, w21, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "bts dword [rax], 31": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #3]",
-        "orr x21, x20, #0x80",
-        "strb w21, [x4, #3]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "ldrb w21, [x20, #3]",
+        "orr x22, x21, #0x80",
+        "strb w22, [x20, #3]",
+        "lsr w20, w21, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "bts qword [rax], 63": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #7]",
-        "orr x21, x20, #0x80",
-        "strb w21, [x4, #7]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "ldrb w21, [x20, #7]",
+        "orr x22, x21, #0x80",
+        "strb w22, [x20, #7]",
+        "lsr w20, w21, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock bts word [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldsetalb w21, w20, [x20]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldsetalb w20, w22, [x21]",
+        "rmif x22, #63, #nzCv"
       ]
     },
     "lock bts dword [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldsetalb w21, w20, [x20]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldsetalb w20, w22, [x21]",
+        "rmif x22, #63, #nzCv"
       ]
     },
     "lock bts qword [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldsetalb w21, w20, [x20]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldsetalb w20, w22, [x21]",
+        "rmif x22, #63, #nzCv"
       ]
     },
     "lock bts word [rax], 15": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x1 (1)",
-        "mov w21, #0x80",
-        "ldsetalb w21, w20, [x20]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "add x21, x20, #0x1 (1)",
+        "mov w20, #0x80",
+        "ldsetalb w20, w22, [x21]",
+        "lsr w20, w22, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock bts dword [rax], 31": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x3 (3)",
-        "mov w21, #0x80",
-        "ldsetalb w21, w20, [x20]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "add x21, x20, #0x3 (3)",
+        "mov w20, #0x80",
+        "ldsetalb w20, w22, [x21]",
+        "lsr w20, w22, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock bts qword [rax], 63": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x7 (7)",
-        "mov w21, #0x80",
-        "ldsetalb w21, w20, [x20]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "add x21, x20, #0x7 (7)",
+        "mov w20, #0x80",
+        "ldsetalb w20, w22, [x21]",
+        "lsr w20, w22, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "btr ax, 0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "rmif x4, #63, #nzCv",
-        "and w20, w4, #0xfffffffe",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "rmif x20, #63, #nzCv",
+        "and w21, w20, #0xfffffffe",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "btr eax, 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "rmif x4, #63, #nzCv",
-        "and w4, w4, #0xfffffffe"
+        "mov x20, x4",
+        "rmif x20, #63, #nzCv",
+        "and w21, w20, #0xfffffffe",
+        "mov x4, x21"
       ]
     },
     "btr rax, 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "rmif x4, #63, #nzCv",
-        "and x4, x4, #0xfffffffffffffffe"
+        "mov x20, x4",
+        "rmif x20, #63, #nzCv",
+        "and x21, x20, #0xfffffffffffffffe",
+        "mov x4, x21"
       ]
     },
     "btr ax, 15": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "rmif x4, #14, #nzCv",
-        "and w20, w4, #0xffff7fff",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "rmif x20, #14, #nzCv",
+        "and w21, w20, #0xffff7fff",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "btr eax, 31": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "rmif x4, #30, #nzCv",
-        "and w4, w4, #0x7fffffff"
+        "mov x20, x4",
+        "rmif x20, #30, #nzCv",
+        "and w21, w20, #0x7fffffff",
+        "mov x4, x21"
       ]
     },
     "btr rax, 63": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "rmif x4, #62, #nzCv",
-        "and x4, x4, #0x7fffffffffffffff"
+        "mov x20, x4",
+        "rmif x20, #62, #nzCv",
+        "and x21, x20, #0x7fffffffffffffff",
+        "mov x4, x21"
       ]
     },
     "btr word [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "and x21, x20, #0xfffffffffffffffe",
-        "strb w21, [x4]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "and x22, x21, #0xfffffffffffffffe",
+        "strb w22, [x20]",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "btr dword [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "and x21, x20, #0xfffffffffffffffe",
-        "strb w21, [x4]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "and x22, x21, #0xfffffffffffffffe",
+        "strb w22, [x20]",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "btr qword [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "and x21, x20, #0xfffffffffffffffe",
-        "strb w21, [x4]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "and x22, x21, #0xfffffffffffffffe",
+        "strb w22, [x20]",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "btr word [rax], 15": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #1]",
-        "and x21, x20, #0xffffffffffffff7f",
-        "strb w21, [x4, #1]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "ldrb w21, [x20, #1]",
+        "and x22, x21, #0xffffffffffffff7f",
+        "strb w22, [x20, #1]",
+        "lsr w20, w21, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "btr dword [rax], 31": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #3]",
-        "and x21, x20, #0xffffffffffffff7f",
-        "strb w21, [x4, #3]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "ldrb w21, [x20, #3]",
+        "and x22, x21, #0xffffffffffffff7f",
+        "strb w22, [x20, #3]",
+        "lsr w20, w21, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "btr qword [rax], 63": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #7]",
-        "and x21, x20, #0xffffffffffffff7f",
-        "strb w21, [x4, #7]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "ldrb w21, [x20, #7]",
+        "and x22, x21, #0xffffffffffffff7f",
+        "strb w22, [x20, #7]",
+        "lsr w20, w21, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock btr word [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldclralb w21, w20, [x20]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldclralb w20, w22, [x21]",
+        "rmif x22, #63, #nzCv"
       ]
     },
     "lock btr dword [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldclralb w21, w20, [x20]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldclralb w20, w22, [x21]",
+        "rmif x22, #63, #nzCv"
       ]
     },
     "lock btr qword [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldclralb w21, w20, [x20]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldclralb w20, w22, [x21]",
+        "rmif x22, #63, #nzCv"
       ]
     },
     "lock btr word [rax], 15": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x1 (1)",
-        "mov w21, #0x80",
-        "ldclralb w21, w20, [x20]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "add x21, x20, #0x1 (1)",
+        "mov w20, #0x80",
+        "ldclralb w20, w22, [x21]",
+        "lsr w20, w22, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock btr dword [rax], 31": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x3 (3)",
-        "mov w21, #0x80",
-        "ldclralb w21, w20, [x20]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "add x21, x20, #0x3 (3)",
+        "mov w20, #0x80",
+        "ldclralb w20, w22, [x21]",
+        "lsr w20, w22, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock btr qword [rax], 63": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x7 (7)",
-        "mov w21, #0x80",
-        "ldclralb w21, w20, [x20]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "add x21, x20, #0x7 (7)",
+        "mov w20, #0x80",
+        "ldclralb w20, w22, [x21]",
+        "lsr w20, w22, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "btc ax, 0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /7",
       "ExpectedArm64ASM": [
-        "rmif x4, #63, #nzCv",
-        "eor w20, w4, #0x1",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "rmif x20, #63, #nzCv",
+        "eor w21, w20, #0x1",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "btc eax, 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /7",
       "ExpectedArm64ASM": [
-        "rmif x4, #63, #nzCv",
-        "eor w4, w4, #0x1"
+        "mov x20, x4",
+        "rmif x20, #63, #nzCv",
+        "eor w21, w20, #0x1",
+        "mov x4, x21"
       ]
     },
     "btc rax, 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /7",
       "ExpectedArm64ASM": [
-        "rmif x4, #63, #nzCv",
-        "eor x4, x4, #0x1"
+        "mov x20, x4",
+        "rmif x20, #63, #nzCv",
+        "eor x21, x20, #0x1",
+        "mov x4, x21"
       ]
     },
     "btc ax, 15": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /7",
       "ExpectedArm64ASM": [
-        "rmif x4, #14, #nzCv",
-        "eor w20, w4, #0x8000",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "rmif x20, #14, #nzCv",
+        "eor w21, w20, #0x8000",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "btc eax, 31": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /7",
       "ExpectedArm64ASM": [
-        "rmif x4, #30, #nzCv",
-        "eor w4, w4, #0x80000000"
+        "mov x20, x4",
+        "rmif x20, #30, #nzCv",
+        "eor w21, w20, #0x80000000",
+        "mov x4, x21"
       ]
     },
     "btc rax, 63": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /7",
       "ExpectedArm64ASM": [
-        "rmif x4, #62, #nzCv",
-        "eor x4, x4, #0x8000000000000000"
+        "mov x20, x4",
+        "rmif x20, #62, #nzCv",
+        "eor x21, x20, #0x8000000000000000",
+        "mov x4, x21"
       ]
     },
     "btc word [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "eor x21, x20, #0x1",
-        "strb w21, [x4]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "eor x22, x21, #0x1",
+        "strb w22, [x20]",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "btc dword [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "eor x21, x20, #0x1",
-        "strb w21, [x4]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "eor x22, x21, #0x1",
+        "strb w22, [x20]",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "btc qword [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "eor x21, x20, #0x1",
-        "strb w21, [x4]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "eor x22, x21, #0x1",
+        "strb w22, [x20]",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "btc word [rax], 15": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #1]",
-        "eor x21, x20, #0x80",
-        "strb w21, [x4, #1]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "ldrb w21, [x20, #1]",
+        "eor x22, x21, #0x80",
+        "strb w22, [x20, #1]",
+        "lsr w20, w21, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "btc dword [rax], 31": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #3]",
-        "eor x21, x20, #0x80",
-        "strb w21, [x4, #3]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "ldrb w21, [x20, #3]",
+        "eor x22, x21, #0x80",
+        "strb w22, [x20, #3]",
+        "lsr w20, w21, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "btc qword [rax], 63": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #7]",
-        "eor x21, x20, #0x80",
-        "strb w21, [x4, #7]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "ldrb w21, [x20, #7]",
+        "eor x22, x21, #0x80",
+        "strb w22, [x20, #7]",
+        "lsr w20, w21, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock btc word [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldeoralb w21, w20, [x20]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldeoralb w20, w22, [x21]",
+        "rmif x22, #63, #nzCv"
       ]
     },
     "lock btc dword [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldeoralb w21, w20, [x20]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldeoralb w20, w22, [x21]",
+        "rmif x22, #63, #nzCv"
       ]
     },
     "lock btc qword [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldeoralb w21, w20, [x20]",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldeoralb w20, w22, [x21]",
+        "rmif x22, #63, #nzCv"
       ]
     },
     "lock btc word [rax], 15": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x1 (1)",
-        "mov w21, #0x80",
-        "ldeoralb w21, w20, [x20]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "add x21, x20, #0x1 (1)",
+        "mov w20, #0x80",
+        "ldeoralb w20, w22, [x21]",
+        "lsr w20, w22, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock btc dword [rax], 31": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x3 (3)",
-        "mov w21, #0x80",
-        "ldeoralb w21, w20, [x20]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "add x21, x20, #0x3 (3)",
+        "mov w20, #0x80",
+        "ldeoralb w20, w22, [x21]",
+        "lsr w20, w22, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "lock btc qword [rax], 63": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x7 (7)",
-        "mov w21, #0x80",
-        "ldeoralb w21, w20, [x20]",
-        "lsr w20, w20, #7",
+        "mov x20, x4",
+        "add x21, x20, #0x7 (7)",
+        "mov w20, #0x80",
+        "ldeoralb w20, w22, [x21]",
+        "lsr w20, w22, #7",
         "rmif x20, #63, #nzCv"
       ]
     },
     "cmpxchg8b [rbp]": {
-      "ExpectedInstructionCount": 22,
+      "ExpectedInstructionCount": 37,
       "Comment": "GROUP9 0x0F 0xC7 /1",
       "ExpectedArm64ASM": [
-        "add x20, x9, #0x0 (0)",
-        "mov w21, w4",
-        "mov w22, w6",
-        "mov w23, w22",
-        "mov w22, w21",
-        "mov w21, w7",
-        "mov w24, w5",
-        "mov w25, w24",
-        "mov w24, w21",
-        "mov w2, w22",
-        "mov w3, w23",
-        "caspal w2, w3, w24, w25, [x20]",
+        "sub sp, sp, #0x40 (64)",
+        "mov x20, x9",
+        "add x21, x20, #0x0 (0)",
+        "mov x20, x4",
+        "mov w22, w20",
+        "mov x23, x6",
+        "mov w24, w23",
+        "mov x30, x24",
+        "mov w24, w22",
+        "mov w25, w30",
+        "mov x22, x7",
+        "mov w30, w22",
+        "mov x22, x5",
+        "mov w18, w22",
+        "str x23, [sp]",
+        "mov w22, w30",
+        "mov w23, w18",
+        "str x20, [sp, #32]",
+        "mov x30, x21",
+        "mov w2, w24",
+        "mov w3, w25",
+        "caspal w2, w3, w22, w23, [x30]",
         "mov w20, w2",
         "mov w21, w3",
-        "mov w24, w20",
-        "mov w25, w21",
+        "mov w22, w20",
+        "mov w23, w21",
         "mrs x0, nzcv",
-        "cmp w20, w22",
-        "ccmp w21, w23, #nzcv, eq",
+        "cmp w20, w24",
+        "ccmp w21, w25, #nzcv, eq",
         "rmif x0, #0, #NzCV",
-        "csel x4, x24, x4, ne",
-        "csel x6, x25, x6, ne"
+        "ldr x20, [sp, #32]",
+        "csel x21, x22, x20, ne",
+        "mov x4, x21",
+        "ldr x20, [sp]",
+        "csel x21, x23, x20, ne",
+        "mov x6, x21",
+        "add sp, sp, #0x40 (64)"
       ]
     },
     "cmpxchg16b [rbp]": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 33,
       "Comment": "GROUP9 0x0F 0xC7 /1",
       "ExpectedArm64ASM": [
-        "add x20, x9, #0x0 (0)",
-        "mov x22, x4",
-        "mov x23, x6",
-        "mov x24, x7",
-        "mov x25, x5",
-        "mov x2, x22",
-        "mov x3, x23",
-        "caspal x2, x3, x24, x25, [x20]",
+        "sub sp, sp, #0x40 (64)",
+        "mov x20, x9",
+        "add x21, x20, #0x0 (0)",
+        "mov x20, x4",
+        "mov x22, x6",
+        "mov x24, x20",
+        "mov x25, x22",
+        "mov x23, x7",
+        "mov x30, x5",
+        "str x22, [sp]",
+        "mov x18, x23",
+        "mov x22, x18",
+        "mov x23, x30",
+        "str x20, [sp, #32]",
+        "mov x30, x21",
+        "mov x2, x24",
+        "mov x3, x25",
+        "caspal x2, x3, x22, x23, [x30]",
         "mov x20, x2",
         "mov x21, x3",
-        "mov x24, x20",
-        "mov x25, x21",
+        "mov x22, x20",
+        "mov x23, x21",
         "mrs x0, nzcv",
-        "cmp w20, w22",
-        "ccmp w21, w23, #nzcv, eq",
+        "cmp w20, w24",
+        "ccmp w21, w25, #nzcv, eq",
         "rmif x0, #0, #NzCV",
-        "csel x4, x24, x4, ne",
-        "csel x6, x25, x6, ne"
+        "ldr x20, [sp, #32]",
+        "csel x21, x22, x20, ne",
+        "mov x4, x21",
+        "ldr x20, [sp]",
+        "csel x21, x23, x20, ne",
+        "mov x6, x21",
+        "add sp, sp, #0x40 (64)"
       ]
     },
     "rdrand ax": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP9 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "mrs x20, rndr",
         "cset x21, ne",
         "mov x22, x20",
-        "mov x20, x21",
-        "bfxil x4, x22, #0, #16",
-        "mov w26, #0x1",
-        "mov w27, #0x0",
-        "lsl x20, x20, #29",
+        "mov x23, x21",
+        "mov x20, x4",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21",
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20",
+        "lsl x20, x23, #29",
         "msr nzcv, x20"
       ]
     },
     "rdrand eax": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP9 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "mrs x20, rndr",
         "cset x21, ne",
         "mov x22, x20",
-        "mov x20, x21",
-        "mov w4, w22",
-        "mov w26, #0x1",
-        "mov w27, #0x0",
-        "lsl x20, x20, #29",
+        "mov x23, x21",
+        "mov w20, w22",
+        "mov x4, x20",
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20",
+        "lsl x20, x23, #29",
         "msr nzcv, x20"
       ]
     },
     "rdrand rax": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP9 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "mrs x20, rndr",
         "cset x21, ne",
-        "mov x4, x20",
-        "mov x20, x21",
-        "mov w26, #0x1",
-        "mov w27, #0x0",
-        "lsl x20, x20, #29",
+        "mov x22, x20",
+        "mov x23, x21",
+        "mov x4, x22",
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20",
+        "lsl x20, x23, #29",
         "msr nzcv, x20"
       ]
     },
     "rdseed ax": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP9 0x0F 0xC7 /7",
       "ExpectedArm64ASM": [
         "mrs x20, rndrrs",
         "cset x21, ne",
         "mov x22, x20",
-        "mov x20, x21",
-        "bfxil x4, x22, #0, #16",
-        "mov w26, #0x1",
-        "mov w27, #0x0",
-        "lsl x20, x20, #29",
+        "mov x23, x21",
+        "mov x20, x4",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21",
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20",
+        "lsl x20, x23, #29",
         "msr nzcv, x20"
       ]
     },
     "rdseed eax": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP9 0x0F 0xC7 /7",
       "ExpectedArm64ASM": [
         "mrs x20, rndrrs",
         "cset x21, ne",
         "mov x22, x20",
-        "mov x20, x21",
-        "mov w4, w22",
-        "mov w26, #0x1",
-        "mov w27, #0x0",
-        "lsl x20, x20, #29",
+        "mov x23, x21",
+        "mov w20, w22",
+        "mov x4, x20",
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20",
+        "lsl x20, x23, #29",
         "msr nzcv, x20"
       ]
     },
     "rdseed rax": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP9 0x0F 0xC7 /7",
       "ExpectedArm64ASM": [
         "mrs x20, rndrrs",
         "cset x21, ne",
-        "mov x4, x20",
-        "mov x20, x21",
-        "mov w26, #0x1",
-        "mov w27, #0x0",
-        "lsl x20, x20, #29",
+        "mov x22, x20",
+        "mov x23, x21",
+        "mov x4, x22",
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20",
+        "lsl x20, x23, #29",
         "msr nzcv, x20"
       ]
     },
@@ -795,8 +938,8 @@
       "Comment": "GROUP12 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ushr v2.8h, v2.8h, #15",
-        "str d2, [x28, #768]"
+        "ushr v3.8h, v2.8h, #15",
+        "str d3, [x28, #768]"
       ]
     },
     "psrlw mm0, 16": {
@@ -805,8 +948,8 @@
       "Comment": "GROUP12 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "movi v2.2d, #0x0",
-        "str d2, [x28, #768]"
+        "movi v3.2d, #0x0",
+        "str d3, [x28, #768]"
       ]
     },
     "psrlw xmm0, 0": {
@@ -816,19 +959,23 @@
       "ExpectedArm64ASM": []
     },
     "psrlw xmm0, 15": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP12 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
-        "ushr v16.8h, v16.8h, #15"
+        "mov v2.16b, v16.16b",
+        "ushr v3.8h, v2.8h, #15",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psrlw xmm0, 16": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP12 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov v2.16b, v16.16b",
+        "movi v3.2d, #0x0",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psraw mm0, 0": {
@@ -843,8 +990,8 @@
       "Comment": "GROUP12 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "sshr v2.8h, v2.8h, #15",
-        "str d2, [x28, #768]"
+        "sshr v3.8h, v2.8h, #15",
+        "str d3, [x28, #768]"
       ]
     },
     "psraw mm0, 16": {
@@ -853,8 +1000,8 @@
       "Comment": "GROUP12 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "sshr v2.8h, v2.8h, #15",
-        "str d2, [x28, #768]"
+        "sshr v3.8h, v2.8h, #15",
+        "str d3, [x28, #768]"
       ]
     },
     "psraw xmm0, 0": {
@@ -864,19 +1011,23 @@
       "ExpectedArm64ASM": []
     },
     "psraw xmm0, 15": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP12 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
-        "sshr v16.8h, v16.8h, #15"
+        "mov v2.16b, v16.16b",
+        "sshr v3.8h, v2.8h, #15",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psraw xmm0, 16": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP12 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
-        "sshr v16.8h, v16.8h, #15"
+        "mov v2.16b, v16.16b",
+        "sshr v3.8h, v2.8h, #15",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psllw mm0, 0": {
@@ -891,8 +1042,8 @@
       "Comment": "GROUP12 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "shl v2.8h, v2.8h, #15",
-        "str d2, [x28, #768]"
+        "shl v3.8h, v2.8h, #15",
+        "str d3, [x28, #768]"
       ]
     },
     "psllw mm0, 16": {
@@ -901,8 +1052,8 @@
       "Comment": "GROUP12 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "movi v2.2d, #0x0",
-        "str d2, [x28, #768]"
+        "movi v3.2d, #0x0",
+        "str d3, [x28, #768]"
       ]
     },
     "psllw xmm0, 0": {
@@ -912,19 +1063,23 @@
       "ExpectedArm64ASM": []
     },
     "psllw xmm0, 15": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP12 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
-        "shl v16.8h, v16.8h, #15"
+        "mov v2.16b, v16.16b",
+        "shl v3.8h, v2.8h, #15",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psllw xmm0, 16": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP12 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov v2.16b, v16.16b",
+        "movi v3.2d, #0x0",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psrld mm0, 0": {
@@ -939,8 +1094,8 @@
       "Comment": "GROUP13 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ushr v2.4s, v2.4s, #31",
-        "str d2, [x28, #768]"
+        "ushr v3.4s, v2.4s, #31",
+        "str d3, [x28, #768]"
       ]
     },
     "psrld mm0, 32": {
@@ -949,8 +1104,8 @@
       "Comment": "GROUP13 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "movi v2.2d, #0x0",
-        "str d2, [x28, #768]"
+        "movi v3.2d, #0x0",
+        "str d3, [x28, #768]"
       ]
     },
     "psrld xmm0, 0": {
@@ -960,19 +1115,23 @@
       "ExpectedArm64ASM": []
     },
     "psrld xmm0, 31": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP13 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
-        "ushr v16.4s, v16.4s, #31"
+        "mov v2.16b, v16.16b",
+        "ushr v3.4s, v2.4s, #31",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psrld xmm0, 32": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP13 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov v2.16b, v16.16b",
+        "movi v3.2d, #0x0",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psrad mm0, 0": {
@@ -987,8 +1146,8 @@
       "Comment": "GROUP13 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "sshr v2.4s, v2.4s, #31",
-        "str d2, [x28, #768]"
+        "sshr v3.4s, v2.4s, #31",
+        "str d3, [x28, #768]"
       ]
     },
     "psrad mm0, 32": {
@@ -997,8 +1156,8 @@
       "Comment": "GROUP13 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "sshr v2.4s, v2.4s, #31",
-        "str d2, [x28, #768]"
+        "sshr v3.4s, v2.4s, #31",
+        "str d3, [x28, #768]"
       ]
     },
     "psrad xmm0, 0": {
@@ -1008,19 +1167,23 @@
       "ExpectedArm64ASM": []
     },
     "psrad xmm0, 31": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP13 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
-        "sshr v16.4s, v16.4s, #31"
+        "mov v2.16b, v16.16b",
+        "sshr v3.4s, v2.4s, #31",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psrad xmm0, 32": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP13 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
-        "sshr v16.4s, v16.4s, #31"
+        "mov v2.16b, v16.16b",
+        "sshr v3.4s, v2.4s, #31",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pslld mm0, 0": {
@@ -1035,8 +1198,8 @@
       "Comment": "GROUP13 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "shl v2.4s, v2.4s, #31",
-        "str d2, [x28, #768]"
+        "shl v3.4s, v2.4s, #31",
+        "str d3, [x28, #768]"
       ]
     },
     "pslld mm0, 32": {
@@ -1045,8 +1208,8 @@
       "Comment": "GROUP13 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "movi v2.2d, #0x0",
-        "str d2, [x28, #768]"
+        "movi v3.2d, #0x0",
+        "str d3, [x28, #768]"
       ]
     },
     "pslld xmm0, 0": {
@@ -1056,19 +1219,23 @@
       "ExpectedArm64ASM": []
     },
     "pslld xmm0, 31": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP13 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
-        "shl v16.4s, v16.4s, #31"
+        "mov v2.16b, v16.16b",
+        "shl v3.4s, v2.4s, #31",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pslld xmm0, 32": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP13 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov v2.16b, v16.16b",
+        "movi v3.2d, #0x0",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psrlq mm0, 0": {
@@ -1083,8 +1250,8 @@
       "Comment": "GROUP14 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ushr v2.2d, v2.2d, #63",
-        "str d2, [x28, #768]"
+        "ushr v3.2d, v2.2d, #63",
+        "str d3, [x28, #768]"
       ]
     },
     "psrlq mm0, 64": {
@@ -1093,8 +1260,8 @@
       "Comment": "GROUP14 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "movi v2.2d, #0x0",
-        "str d2, [x28, #768]"
+        "movi v3.2d, #0x0",
+        "str d3, [x28, #768]"
       ]
     },
     "psrlq xmm0, 0": {
@@ -1104,19 +1271,23 @@
       "ExpectedArm64ASM": []
     },
     "psrlq xmm0, 63": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP14 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
-        "ushr v16.2d, v16.2d, #63"
+        "mov v2.16b, v16.16b",
+        "ushr v3.2d, v2.2d, #63",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psrlq xmm0, 64": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP14 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov v2.16b, v16.16b",
+        "movi v3.2d, #0x0",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psrldq xmm0, 0": {
@@ -1126,20 +1297,23 @@
       "ExpectedArm64ASM": []
     },
     "psrldq xmm0, 15": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Type": "SSE",
       "Comment": "GROUP14 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "ext v16.16b, v16.16b, v2.16b, #15"
+        "mov v2.16b, v16.16b",
+        "movi v3.2d, #0x0",
+        "ext v4.16b, v2.16b, v3.16b, #15",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psrldq xmm0, 16": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Type": "SSE",
       "Comment": "GROUP14 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b"
       ]
     },
     "psllq mm0, 0": {
@@ -1154,8 +1328,8 @@
       "Comment": "GROUP14 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "shl v2.2d, v2.2d, #63",
-        "str d2, [x28, #768]"
+        "shl v3.2d, v2.2d, #63",
+        "str d3, [x28, #768]"
       ]
     },
     "psllq mm0, 64": {
@@ -1164,8 +1338,8 @@
       "Comment": "GROUP14 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "movi v2.2d, #0x0",
-        "str d2, [x28, #768]"
+        "movi v3.2d, #0x0",
+        "str d3, [x28, #768]"
       ]
     },
     "psllq xmm0, 0": {
@@ -1175,152 +1349,194 @@
       "ExpectedArm64ASM": []
     },
     "psllq xmm0, 63": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP14 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
-        "shl v16.2d, v16.2d, #63"
+        "mov v2.16b, v16.16b",
+        "shl v3.2d, v2.2d, #63",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psllq xmm0, 64": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP14 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov v2.16b, v16.16b",
+        "movi v3.2d, #0x0",
+        "mov v16.16b, v3.16b"
       ]
     },
     "fxsave [rax]": {
-      "ExpectedInstructionCount": 58,
+      "ExpectedInstructionCount": 77,
       "Comment": "GROUP15 0x0F 0xAE /0",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x28, #1024]",
-        "strh w20, [x4]",
-        "mov w20, #0x0",
-        "ldrb w21, [x28, #747]",
-        "bfi x20, x21, #11, #3",
+        "mov x20, x4",
+        "ldrh w21, [x28, #1024]",
+        "strh w21, [x20]",
+        "mov w21, #0x0",
+        "ldrb w22, [x28, #747]",
+        "mov x23, x21",
+        "bfi x23, x22, #11, #3",
         "ldrb w21, [x28, #744]",
         "ldrb w22, [x28, #745]",
-        "ldrb w23, [x28, #746]",
-        "ldrb w24, [x28, #750]",
-        "orr x20, x20, x21, lsl #8",
-        "orr x20, x20, x22, lsl #9",
-        "orr x20, x20, x23, lsl #10",
-        "orr x20, x20, x24, lsl #14",
-        "strh w20, [x4, #2]",
-        "ldrb w20, [x28, #1026]",
-        "strb w20, [x4, #4]",
+        "ldrb w24, [x28, #746]",
+        "ldrb w25, [x28, #750]",
+        "orr x30, x23, x21, lsl #8",
+        "orr x21, x30, x22, lsl #9",
+        "orr x22, x21, x24, lsl #10",
+        "orr x21, x22, x25, lsl #14",
+        "strh w21, [x20, #2]",
+        "ldrb w21, [x28, #1026]",
+        "strb w21, [x20, #4]",
         "ldr q2, [x28, #768]",
-        "str q2, [x4, #32]",
+        "str q2, [x20, #32]",
         "ldr q2, [x28, #784]",
-        "str q2, [x4, #48]",
+        "str q2, [x20, #48]",
         "ldr q2, [x28, #800]",
-        "str q2, [x4, #64]",
+        "str q2, [x20, #64]",
         "ldr q2, [x28, #816]",
-        "str q2, [x4, #80]",
+        "str q2, [x20, #80]",
         "ldr q2, [x28, #832]",
-        "str q2, [x4, #96]",
+        "str q2, [x20, #96]",
         "ldr q2, [x28, #848]",
-        "str q2, [x4, #112]",
+        "str q2, [x20, #112]",
         "ldr q2, [x28, #864]",
-        "str q2, [x4, #128]",
+        "str q2, [x20, #128]",
         "ldr q2, [x28, #880]",
-        "str q2, [x4, #144]",
-        "str q16, [x4, #160]",
-        "str q17, [x4, #176]",
-        "str q18, [x4, #192]",
-        "str q19, [x4, #208]",
-        "str q20, [x4, #224]",
-        "str q21, [x4, #240]",
-        "str q22, [x4, #256]",
-        "str q23, [x4, #272]",
-        "str q24, [x4, #288]",
-        "str q25, [x4, #304]",
-        "str q26, [x4, #320]",
-        "str q27, [x4, #336]",
-        "str q28, [x4, #352]",
-        "str q29, [x4, #368]",
-        "str q30, [x4, #384]",
-        "str q31, [x4, #400]",
-        "mov w20, #0x1f80",
-        "mrs x21, fpcr",
-        "ubfx x21, x21, #22, #3",
-        "rbit w0, w21",
-        "bfi x21, x0, #30, #2",
-        "bfi w20, w21, #13, #3",
-        "add x21, x4, #0x18 (24)",
-        "str w20, [x4, #24]",
+        "str q2, [x20, #144]",
+        "mov v2.16b, v16.16b",
+        "str q2, [x20, #160]",
+        "mov v2.16b, v17.16b",
+        "str q2, [x20, #176]",
+        "mov v2.16b, v18.16b",
+        "str q2, [x20, #192]",
+        "mov v2.16b, v19.16b",
+        "str q2, [x20, #208]",
+        "mov v2.16b, v20.16b",
+        "str q2, [x20, #224]",
+        "mov v2.16b, v21.16b",
+        "str q2, [x20, #240]",
+        "mov v2.16b, v22.16b",
+        "str q2, [x20, #256]",
+        "mov v2.16b, v23.16b",
+        "str q2, [x20, #272]",
+        "mov v2.16b, v24.16b",
+        "str q2, [x20, #288]",
+        "mov v2.16b, v25.16b",
+        "str q2, [x20, #304]",
+        "mov v2.16b, v26.16b",
+        "str q2, [x20, #320]",
+        "mov v2.16b, v27.16b",
+        "str q2, [x20, #336]",
+        "mov v2.16b, v28.16b",
+        "str q2, [x20, #352]",
+        "mov v2.16b, v29.16b",
+        "str q2, [x20, #368]",
+        "mov v2.16b, v30.16b",
+        "str q2, [x20, #384]",
+        "mov v2.16b, v31.16b",
+        "str q2, [x20, #400]",
+        "mov w21, #0x1f80",
+        "mrs x22, fpcr",
+        "ubfx x22, x22, #22, #3",
+        "rbit w0, w22",
+        "bfi x22, x0, #30, #2",
+        "mov w23, w21",
+        "bfi w23, w22, #13, #3",
+        "add x21, x20, #0x18 (24)",
+        "str w23, [x20, #24]",
         "mov w20, #0xffff",
         "str w20, [x21, #4]"
       ]
     },
     "rdfsbase eax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP15 0x0F 0xAE /0",
       "ExpectedArm64ASM": [
-        "ldr w4, [x28, #176]"
+        "ldr w20, [x28, #176]",
+        "mov x4, x20"
       ]
     },
     "rdfsbase rax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP15 0x0F 0xAE /0",
       "ExpectedArm64ASM": [
-        "ldr x4, [x28, #176]"
+        "ldr x20, [x28, #176]",
+        "mov x4, x20"
       ]
     },
     "fxrstor [rax]": {
-      "ExpectedInstructionCount": 56,
+      "ExpectedInstructionCount": 73,
       "Comment": "GROUP15 0x0F 0xAE /1",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x4]",
-        "strh w20, [x28, #1024]",
-        "ldrh w20, [x4, #2]",
-        "ubfx w21, w20, #11, #3",
-        "strb w21, [x28, #747]",
-        "ubfx w21, w20, #8, #1",
-        "ubfx w22, w20, #9, #1",
-        "ubfx w23, w20, #10, #1",
-        "ubfx w20, w20, #14, #1",
-        "strb w21, [x28, #744]",
-        "strb w22, [x28, #745]",
-        "strb w23, [x28, #746]",
-        "strb w20, [x28, #750]",
-        "ldrb w20, [x4, #4]",
-        "strb w20, [x28, #1026]",
-        "ldr q2, [x4, #32]",
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "strh w21, [x28, #1024]",
+        "ldrh w21, [x20, #2]",
+        "ubfx w22, w21, #11, #3",
+        "strb w22, [x28, #747]",
+        "ubfx w22, w21, #8, #1",
+        "ubfx w23, w21, #9, #1",
+        "ubfx w24, w21, #10, #1",
+        "ubfx w25, w21, #14, #1",
+        "strb w22, [x28, #744]",
+        "strb w23, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w25, [x28, #750]",
+        "ldrb w21, [x20, #4]",
+        "strb w21, [x28, #1026]",
+        "ldr q2, [x20, #32]",
         "str q2, [x28, #768]",
-        "ldr q2, [x4, #48]",
+        "ldr q2, [x20, #48]",
         "str q2, [x28, #784]",
-        "ldr q2, [x4, #64]",
+        "ldr q2, [x20, #64]",
         "str q2, [x28, #800]",
-        "ldr q2, [x4, #80]",
+        "ldr q2, [x20, #80]",
         "str q2, [x28, #816]",
-        "ldr q2, [x4, #96]",
+        "ldr q2, [x20, #96]",
         "str q2, [x28, #832]",
-        "ldr q2, [x4, #112]",
+        "ldr q2, [x20, #112]",
         "str q2, [x28, #848]",
-        "ldr q2, [x4, #128]",
+        "ldr q2, [x20, #128]",
         "str q2, [x28, #864]",
-        "ldr q2, [x4, #144]",
+        "ldr q2, [x20, #144]",
         "str q2, [x28, #880]",
-        "ldr q16, [x4, #160]",
-        "ldr q17, [x4, #176]",
-        "ldr q18, [x4, #192]",
-        "ldr q19, [x4, #208]",
-        "ldr q20, [x4, #224]",
-        "ldr q21, [x4, #240]",
-        "ldr q22, [x4, #256]",
-        "ldr q23, [x4, #272]",
-        "ldr q24, [x4, #288]",
-        "ldr q25, [x4, #304]",
-        "ldr q26, [x4, #320]",
-        "ldr q27, [x4, #336]",
-        "ldr q28, [x4, #352]",
-        "ldr q29, [x4, #368]",
-        "ldr q30, [x4, #384]",
-        "ldr q31, [x4, #400]",
-        "ldr w20, [x4, #24]",
-        "ubfx w20, w20, #13, #3",
+        "ldr q2, [x20, #160]",
+        "mov v16.16b, v2.16b",
+        "ldr q2, [x20, #176]",
+        "mov v17.16b, v2.16b",
+        "ldr q2, [x20, #192]",
+        "mov v18.16b, v2.16b",
+        "ldr q2, [x20, #208]",
+        "mov v19.16b, v2.16b",
+        "ldr q2, [x20, #224]",
+        "mov v20.16b, v2.16b",
+        "ldr q2, [x20, #240]",
+        "mov v21.16b, v2.16b",
+        "ldr q2, [x20, #256]",
+        "mov v22.16b, v2.16b",
+        "ldr q2, [x20, #272]",
+        "mov v23.16b, v2.16b",
+        "ldr q2, [x20, #288]",
+        "mov v24.16b, v2.16b",
+        "ldr q2, [x20, #304]",
+        "mov v25.16b, v2.16b",
+        "ldr q2, [x20, #320]",
+        "mov v26.16b, v2.16b",
+        "ldr q2, [x20, #336]",
+        "mov v27.16b, v2.16b",
+        "ldr q2, [x20, #352]",
+        "mov v28.16b, v2.16b",
+        "ldr q2, [x20, #368]",
+        "mov v29.16b, v2.16b",
+        "ldr q2, [x20, #384]",
+        "mov v30.16b, v2.16b",
+        "ldr q2, [x20, #400]",
+        "mov v31.16b, v2.16b",
+        "ldr w21, [x20, #24]",
+        "ubfx w20, w21, #13, #3",
         "rbit w1, w20",
         "lsr w1, w1, #30",
         "mrs x0, fpcr",
@@ -1331,25 +1547,28 @@
       ]
     },
     "rdgsbase eax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP15 0x0F 0xAE /1",
       "ExpectedArm64ASM": [
-        "ldr w4, [x28, #168]"
+        "ldr w20, [x28, #168]",
+        "mov x4, x20"
       ]
     },
     "rdgsbase rax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP15 0x0F 0xAE /1",
       "ExpectedArm64ASM": [
-        "ldr x4, [x28, #168]"
+        "ldr x20, [x28, #168]",
+        "mov x4, x20"
       ]
     },
     "ldmxcsr [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP15 0x0F 0xAE /2",
       "ExpectedArm64ASM": [
-        "ldr w20, [x4]",
-        "ubfx w20, w20, #13, #3",
+        "mov x20, x4",
+        "ldr w21, [x20]",
+        "ubfx w20, w21, #13, #3",
         "rbit w1, w20",
         "lsr w1, w1, #30",
         "mrs x0, fpcr",
@@ -1360,22 +1579,24 @@
       ]
     },
     "wrfsbase eax": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP15 0x0F 0xAE /2",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "str x20, [x28, #176]"
+        "mov x20, x4",
+        "mov w21, w20",
+        "str x21, [x28, #176]"
       ]
     },
     "wrfsbase rax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP15 0x0F 0xAE /2",
       "ExpectedArm64ASM": [
-        "str x4, [x28, #176]"
+        "mov x20, x4",
+        "str x20, [x28, #176]"
       ]
     },
     "stmxcsr [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP15 0x0F 0xAE /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x1f80",
@@ -1383,98 +1604,127 @@
         "ubfx x21, x21, #22, #3",
         "rbit w0, w21",
         "bfi x21, x0, #30, #2",
-        "bfi w20, w21, #13, #3",
-        "str w20, [x4]"
+        "mov w22, w20",
+        "bfi w22, w21, #13, #3",
+        "mov x20, x4",
+        "str w22, [x20]"
       ]
     },
     "wrgsbase eax": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP15 0x0F 0xAE /3",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "str x20, [x28, #168]"
+        "mov x20, x4",
+        "mov w21, w20",
+        "str x21, [x28, #168]"
       ]
     },
     "wrgsbase rax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP15 0x0F 0xAE /3",
       "ExpectedArm64ASM": [
-        "str x4, [x28, #168]"
+        "mov x20, x4",
+        "str x20, [x28, #168]"
       ]
     },
     "xsave [rax]": {
-      "ExpectedInstructionCount": 69,
+      "ExpectedInstructionCount": 94,
       "Comment": "GROUP15 0x0F 0xAE /4",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #0, #1",
-        "cbnz x20, #+0x8",
-        "b #+0x84",
-        "ldrh w20, [x28, #1024]",
-        "strh w20, [x4]",
-        "mov w20, #0x0",
-        "ldrb w21, [x28, #747]",
-        "bfi x20, x21, #11, #3",
+        "mov x20, x4",
+        "ubfx x21, x20, #0, #1",
+        "cbnz x21, #+0x8",
+        "b #+0x8c",
+        "mov x20, x4",
+        "ldrh w21, [x28, #1024]",
+        "strh w21, [x20]",
+        "mov w21, #0x0",
+        "ldrb w22, [x28, #747]",
+        "mov x23, x21",
+        "bfi x23, x22, #11, #3",
         "ldrb w21, [x28, #744]",
         "ldrb w22, [x28, #745]",
-        "ldrb w23, [x28, #746]",
-        "ldrb w24, [x28, #750]",
-        "orr x20, x20, x21, lsl #8",
-        "orr x20, x20, x22, lsl #9",
-        "orr x20, x20, x23, lsl #10",
-        "orr x20, x20, x24, lsl #14",
-        "strh w20, [x4, #2]",
-        "ldrb w20, [x28, #1026]",
-        "strb w20, [x4, #4]",
+        "ldrb w24, [x28, #746]",
+        "ldrb w25, [x28, #750]",
+        "orr x30, x23, x21, lsl #8",
+        "orr x21, x30, x22, lsl #9",
+        "orr x22, x21, x24, lsl #10",
+        "orr x21, x22, x25, lsl #14",
+        "strh w21, [x20, #2]",
+        "ldrb w21, [x28, #1026]",
+        "strb w21, [x20, #4]",
         "ldr q2, [x28, #768]",
-        "str q2, [x4, #32]",
+        "str q2, [x20, #32]",
         "ldr q2, [x28, #784]",
-        "str q2, [x4, #48]",
+        "str q2, [x20, #48]",
         "ldr q2, [x28, #800]",
-        "str q2, [x4, #64]",
+        "str q2, [x20, #64]",
         "ldr q2, [x28, #816]",
-        "str q2, [x4, #80]",
+        "str q2, [x20, #80]",
         "ldr q2, [x28, #832]",
-        "str q2, [x4, #96]",
+        "str q2, [x20, #96]",
         "ldr q2, [x28, #848]",
-        "str q2, [x4, #112]",
+        "str q2, [x20, #112]",
         "ldr q2, [x28, #864]",
-        "str q2, [x4, #128]",
+        "str q2, [x20, #128]",
         "ldr q2, [x28, #880]",
-        "str q2, [x4, #144]",
-        "ubfx x20, x4, #1, #1",
-        "cbnz x20, #+0x8",
-        "b #+0x44",
-        "str q16, [x4, #160]",
-        "str q17, [x4, #176]",
-        "str q18, [x4, #192]",
-        "str q19, [x4, #208]",
-        "str q20, [x4, #224]",
-        "str q21, [x4, #240]",
-        "str q22, [x4, #256]",
-        "str q23, [x4, #272]",
-        "str q24, [x4, #288]",
-        "str q25, [x4, #304]",
-        "str q26, [x4, #320]",
-        "str q27, [x4, #336]",
-        "str q28, [x4, #352]",
-        "str q29, [x4, #368]",
-        "str q30, [x4, #384]",
-        "str q31, [x4, #400]",
-        "ubfx x20, x4, #1, #2",
-        "cbnz x20, #+0x8",
-        "b #+0x2c",
-        "mov w20, #0x1f80",
-        "mrs x21, fpcr",
-        "ubfx x21, x21, #22, #3",
-        "rbit w0, w21",
-        "bfi x21, x0, #30, #2",
-        "bfi w20, w21, #13, #3",
-        "add x21, x4, #0x18 (24)",
-        "str w20, [x4, #24]",
+        "str q2, [x20, #144]",
+        "mov x20, x4",
+        "ubfx x21, x20, #1, #1",
+        "cbnz x21, #+0x8",
+        "b #+0x88",
+        "mov x20, x4",
+        "mov v2.16b, v16.16b",
+        "str q2, [x20, #160]",
+        "mov v2.16b, v17.16b",
+        "str q2, [x20, #176]",
+        "mov v2.16b, v18.16b",
+        "str q2, [x20, #192]",
+        "mov v2.16b, v19.16b",
+        "str q2, [x20, #208]",
+        "mov v2.16b, v20.16b",
+        "str q2, [x20, #224]",
+        "mov v2.16b, v21.16b",
+        "str q2, [x20, #240]",
+        "mov v2.16b, v22.16b",
+        "str q2, [x20, #256]",
+        "mov v2.16b, v23.16b",
+        "str q2, [x20, #272]",
+        "mov v2.16b, v24.16b",
+        "str q2, [x20, #288]",
+        "mov v2.16b, v25.16b",
+        "str q2, [x20, #304]",
+        "mov v2.16b, v26.16b",
+        "str q2, [x20, #320]",
+        "mov v2.16b, v27.16b",
+        "str q2, [x20, #336]",
+        "mov v2.16b, v28.16b",
+        "str q2, [x20, #352]",
+        "mov v2.16b, v29.16b",
+        "str q2, [x20, #368]",
+        "mov v2.16b, v30.16b",
+        "str q2, [x20, #384]",
+        "mov v2.16b, v31.16b",
+        "str q2, [x20, #400]",
+        "mov x20, x4",
+        "ubfx x21, x20, #1, #2",
+        "cbnz x21, #+0x8",
+        "b #+0x34",
+        "mov x20, x4",
+        "mov w21, #0x1f80",
+        "mrs x22, fpcr",
+        "ubfx x22, x22, #22, #3",
+        "rbit w0, w22",
+        "bfi x22, x0, #30, #2",
+        "mov w23, w21",
+        "bfi w23, w22, #13, #3",
+        "add x21, x20, #0x18 (24)",
+        "str w23, [x20, #24]",
         "mov w20, #0xffff",
         "str w20, [x21, #4]",
-        "ubfx x20, x4, #0, #3",
-        "str x20, [x4, #512]"
+        "mov x20, x4",
+        "ubfx x21, x20, #0, #3",
+        "str x21, [x20, #512]"
       ]
     },
     "lfence": {
@@ -1485,43 +1735,45 @@
       ]
     },
     "xrstor [rax]": {
-      "ExpectedInstructionCount": 105,
+      "ExpectedInstructionCount": 128,
       "Comment": "GROUP15 0x0F 0xAE /5",
       "ExpectedArm64ASM": [
-        "ldr x20, [x4, #512]",
-        "ubfx x20, x20, #0, #1",
+        "mov x20, x4",
+        "ldr x21, [x20, #512]",
+        "ubfx x20, x21, #0, #1",
         "cbnz x20, #+0x8",
-        "b #+0x84",
-        "ldrh w20, [x4]",
-        "strh w20, [x28, #1024]",
-        "ldrh w20, [x4, #2]",
-        "ubfx w21, w20, #11, #3",
-        "strb w21, [x28, #747]",
-        "ubfx w21, w20, #8, #1",
-        "ubfx w22, w20, #9, #1",
-        "ubfx w23, w20, #10, #1",
-        "ubfx w20, w20, #14, #1",
-        "strb w21, [x28, #744]",
-        "strb w22, [x28, #745]",
-        "strb w23, [x28, #746]",
-        "strb w20, [x28, #750]",
-        "ldrb w20, [x4, #4]",
-        "strb w20, [x28, #1026]",
-        "ldr q2, [x4, #32]",
+        "b #+0x88",
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "strh w21, [x28, #1024]",
+        "ldrh w21, [x20, #2]",
+        "ubfx w22, w21, #11, #3",
+        "strb w22, [x28, #747]",
+        "ubfx w22, w21, #8, #1",
+        "ubfx w23, w21, #9, #1",
+        "ubfx w24, w21, #10, #1",
+        "ubfx w25, w21, #14, #1",
+        "strb w22, [x28, #744]",
+        "strb w23, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w25, [x28, #750]",
+        "ldrb w21, [x20, #4]",
+        "strb w21, [x28, #1026]",
+        "ldr q2, [x20, #32]",
         "str q2, [x28, #768]",
-        "ldr q2, [x4, #48]",
+        "ldr q2, [x20, #48]",
         "str q2, [x28, #784]",
-        "ldr q2, [x4, #64]",
+        "ldr q2, [x20, #64]",
         "str q2, [x28, #800]",
-        "ldr q2, [x4, #80]",
+        "ldr q2, [x20, #80]",
         "str q2, [x28, #816]",
-        "ldr q2, [x4, #96]",
+        "ldr q2, [x20, #96]",
         "str q2, [x28, #832]",
-        "ldr q2, [x4, #112]",
+        "ldr q2, [x20, #112]",
         "str q2, [x28, #848]",
-        "ldr q2, [x4, #128]",
+        "ldr q2, [x20, #128]",
         "str q2, [x28, #864]",
-        "ldr q2, [x4, #144]",
+        "ldr q2, [x20, #144]",
         "str q2, [x28, #880]",
         "b #+0x4c",
         "mov w20, #0x0",
@@ -1542,49 +1794,70 @@
         "str q2, [x28, #848]",
         "str q2, [x28, #864]",
         "str q2, [x28, #880]",
-        "ldr x20, [x4, #512]",
-        "ubfx x20, x20, #1, #1",
+        "mov x20, x4",
+        "ldr x21, [x20, #512]",
+        "ubfx x20, x21, #1, #1",
         "cbnz x20, #+0x8",
+        "b #+0x8c",
+        "mov x20, x4",
+        "ldr q2, [x20, #160]",
+        "mov v16.16b, v2.16b",
+        "ldr q2, [x20, #176]",
+        "mov v17.16b, v2.16b",
+        "ldr q2, [x20, #192]",
+        "mov v18.16b, v2.16b",
+        "ldr q2, [x20, #208]",
+        "mov v19.16b, v2.16b",
+        "ldr q2, [x20, #224]",
+        "mov v20.16b, v2.16b",
+        "ldr q2, [x20, #240]",
+        "mov v21.16b, v2.16b",
+        "ldr q2, [x20, #256]",
+        "mov v22.16b, v2.16b",
+        "ldr q2, [x20, #272]",
+        "mov v23.16b, v2.16b",
+        "ldr q2, [x20, #288]",
+        "mov v24.16b, v2.16b",
+        "ldr q2, [x20, #304]",
+        "mov v25.16b, v2.16b",
+        "ldr q2, [x20, #320]",
+        "mov v26.16b, v2.16b",
+        "ldr q2, [x20, #336]",
+        "mov v27.16b, v2.16b",
+        "ldr q2, [x20, #352]",
+        "mov v28.16b, v2.16b",
+        "ldr q2, [x20, #368]",
+        "mov v29.16b, v2.16b",
+        "ldr q2, [x20, #384]",
+        "mov v30.16b, v2.16b",
+        "ldr q2, [x20, #400]",
+        "mov v31.16b, v2.16b",
         "b #+0x48",
-        "ldr q16, [x4, #160]",
-        "ldr q17, [x4, #176]",
-        "ldr q18, [x4, #192]",
-        "ldr q19, [x4, #208]",
-        "ldr q20, [x4, #224]",
-        "ldr q21, [x4, #240]",
-        "ldr q22, [x4, #256]",
-        "ldr q23, [x4, #272]",
-        "ldr q24, [x4, #288]",
-        "ldr q25, [x4, #304]",
-        "ldr q26, [x4, #320]",
-        "ldr q27, [x4, #336]",
-        "ldr q28, [x4, #352]",
-        "ldr q29, [x4, #368]",
-        "ldr q30, [x4, #384]",
-        "ldr q31, [x4, #400]",
-        "b #+0x44",
-        "movi v16.2d, #0x0",
-        "mov v17.16b, v16.16b",
-        "mov v18.16b, v16.16b",
-        "mov v19.16b, v16.16b",
-        "mov v20.16b, v16.16b",
-        "mov v21.16b, v16.16b",
-        "mov v22.16b, v16.16b",
-        "mov v23.16b, v16.16b",
-        "mov v24.16b, v16.16b",
-        "mov v25.16b, v16.16b",
-        "mov v26.16b, v16.16b",
-        "mov v27.16b, v16.16b",
-        "mov v28.16b, v16.16b",
-        "mov v29.16b, v16.16b",
-        "mov v30.16b, v16.16b",
-        "mov v31.16b, v16.16b",
-        "ldr x20, [x4, #512]",
-        "ubfx x20, x20, #1, #2",
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b",
+        "mov v17.16b, v2.16b",
+        "mov v18.16b, v2.16b",
+        "mov v19.16b, v2.16b",
+        "mov v20.16b, v2.16b",
+        "mov v21.16b, v2.16b",
+        "mov v22.16b, v2.16b",
+        "mov v23.16b, v2.16b",
+        "mov v24.16b, v2.16b",
+        "mov v25.16b, v2.16b",
+        "mov v26.16b, v2.16b",
+        "mov v27.16b, v2.16b",
+        "mov v28.16b, v2.16b",
+        "mov v29.16b, v2.16b",
+        "mov v30.16b, v2.16b",
+        "mov v31.16b, v2.16b",
+        "mov x20, x4",
+        "ldr x21, [x20, #512]",
+        "ubfx x20, x21, #1, #2",
         "cbnz x20, #+0x8",
-        "b #+0x2c",
-        "ldr w20, [x4, #24]",
-        "ubfx w20, w20, #13, #3",
+        "b #+0x30",
+        "mov x20, x4",
+        "ldr w21, [x20, #24]",
+        "ubfx w20, w21, #13, #3",
         "rbit w1, w20",
         "lsr w1, w1, #30",
         "mrs x0, fpcr",
@@ -1603,10 +1876,11 @@
       ]
     },
     "clwb [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP15 0x0F 0xAE /6",
       "ExpectedArm64ASM": [
-        "dc cvac, x4"
+        "mov x20, x4",
+        "dc cvac, x20"
       ]
     },
     "sfence": {
@@ -1617,54 +1891,60 @@
       ]
     },
     "clflush [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP15 0x0F 0xAE /7",
       "ExpectedArm64ASM": [
-        "dc civac, x4",
+        "mov x20, x4",
+        "dc civac, x20",
         "dsb ish"
       ]
     },
     "clflushopt [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP15 0x0F 0xAE /7",
       "ExpectedArm64ASM": [
-        "dc civac, x4"
+        "mov x20, x4",
+        "dc civac, x20"
       ]
     },
     "prefetchnta [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "GROUP16 0x0F 0x18 /0"
       ],
       "ExpectedArm64ASM": [
-        "prfm pldl1strm, [x4]"
+        "mov x20, x4",
+        "prfm pldl1strm, [x20]"
       ]
     },
     "prefetcht0 [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "GROUP16 0x0F 0x18 /1"
       ],
       "ExpectedArm64ASM": [
-        "prfm pldl1keep, [x4]"
+        "mov x20, x4",
+        "prfm pldl1keep, [x20]"
       ]
     },
     "prefetcht1 [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "GROUP16 0x0F 0x18 /2"
       ],
       "ExpectedArm64ASM": [
-        "prfm pldl2keep, [x4]"
+        "mov x20, x4",
+        "prfm pldl2keep, [x20]"
       ]
     },
     "prefetcht2 [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "GROUP16 0x0F 0x18 /3"
       ],
       "ExpectedArm64ASM": [
-        "prfm pldl3keep, [x4]"
+        "mov x20, x4",
+        "prfm pldl3keep, [x20]"
       ]
     },
     "db 0x0f, 0x18, 0x20;": {
@@ -1677,31 +1957,34 @@
       "ExpectedArm64ASM": []
     },
     "db 0x0f, 0x0d, 0x00": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "GROUPP 0x0F 0x0D /0",
         "prefetch_exclusive [rax]"
       ],
       "ExpectedArm64ASM": [
-        "prfm pldl1keep, [x4]"
+        "mov x20, x4",
+        "prfm pldl1keep, [x20]"
       ]
     },
     "prefetchw [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "GROUPP 0x0F 0x0D /1"
       ],
       "ExpectedArm64ASM": [
-        "prfm pstl1keep, [x4]"
+        "mov x20, x4",
+        "prfm pstl1keep, [x20]"
       ]
     },
     "prefetchwt1 [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "GROUPP 0x0F 0x0D /2"
       ],
       "ExpectedArm64ASM": [
-        "prfm pstl1keep, [x4]"
+        "mov x20, x4",
+        "prfm pstl1keep, [x20]"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FlagM/SecondaryModRM.json b/unittests/InstructionCountCI/FlagM/SecondaryModRM.json
index ed95f77d38..85103990db 100644
--- a/unittests/InstructionCountCI/FlagM/SecondaryModRM.json
+++ b/unittests/InstructionCountCI/FlagM/SecondaryModRM.json
@@ -14,9 +14,10 @@
   },
   "Instructions": {
     "xgetbv": {
-      "ExpectedInstructionCount": 54,
+      "ExpectedInstructionCount": 57,
       "Comment": "0xF 0x01 /2 RM-0",
       "ExpectedArm64ASM": [
+        "mov x20, x5",
         "sub sp, sp, #0xf0 (240)",
         "mov x3, sp",
         "st1 {v2.2d, v3.2d}, [x3], #32",
@@ -41,7 +42,7 @@
         "st1 {v20.2d, v21.2d, v22.2d, v23.2d}, [x3], #64",
         "st1 {v24.2d, v25.2d, v26.2d, v27.2d}, [x3], #64",
         "st1 {v28.2d, v29.2d, v30.2d, v31.2d}, [x3], #64",
-        "mov w1, w5",
+        "mov w1, w20",
         "ldr x0, [x28, #1112]",
         "ldr x2, [x28, #1128]",
         "blr x2",
@@ -67,20 +68,22 @@
         "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp], #64",
         "ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov w20, w0",
-        "lsr x21, x0, #32",
-        "mov w4, w20",
-        "mov w6, w21"
+        "mov w22, w0",
+        "lsr x23, x0, #32",
+        "mov w20, w22",
+        "mov w21, w23",
+        "mov x4, x20",
+        "mov x6, x21"
       ]
     },
     "rdtscp": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 24,
       "Comment": "0xF 0x01 /7 RM-1",
       "ExpectedArm64ASM": [
         "dmb ld",
         "mrs x20, S3_3_c14_c0_2",
-        "lsl w4, w20, #7",
-        "lsr x6, x20, #25",
+        "lsl w21, w20, #7",
+        "lsr x22, x20, #25",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "str x8, [x28, #40]",
@@ -97,7 +100,10 @@
         "msr nzcv, x8",
         "ldr x8, [x28, #40]",
         "str xzr, [x28, #1056]",
-        "orr x5, x0, x1, lsl #12"
+        "orr x20, x0, x1, lsl #12",
+        "mov x4, x21",
+        "mov x5, x20",
+        "mov x6, x22"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json b/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json
index 24169c7eae..c0b4ccc9e1 100644
--- a/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json
+++ b/unittests/InstructionCountCI/FlagM/Secondary_OpSize.json
@@ -13,48 +13,62 @@
   },
   "Instructions": {
     "ucomisd xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x66 0x0f 0x2e",
       "ExpectedArm64ASM": [
-        "fcmp d16, d17",
-        "mov w27, #0x0",
-        "cset w26, vc",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fcmp d2, d3",
+        "mov w20, #0x0",
+        "cset w21, vc",
+        "mov x26, x21",
         "axflag",
-        "cfinv"
+        "cfinv",
+        "mov x27, x20"
       ]
     },
     "comisd xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x66 0x0f 0x2f",
       "ExpectedArm64ASM": [
-        "fcmp d16, d17",
-        "mov w27, #0x0",
-        "cset w26, vc",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fcmp d2, d3",
+        "mov w20, #0x0",
+        "cset w21, vc",
+        "mov x26, x21",
         "axflag",
-        "cfinv"
+        "cfinv",
+        "mov x27, x20"
       ]
     },
     "pmovmskb eax, xmm0": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x66 0x0f 0xd7",
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2272]",
-        "cmlt v3.16b, v16.16b, #0",
-        "and v2.16b, v3.16b, v2.16b",
-        "addp v2.16b, v2.16b, v2.16b",
-        "addp v2.8b, v2.8b, v2.8b",
-        "addp v2.8b, v2.8b, v2.8b",
-        "umov w4, v2.h[0]"
+        "mov v2.16b, v16.16b",
+        "ldr q3, [x28, #2272]",
+        "cmlt v4.16b, v2.16b, #0",
+        "and v2.16b, v4.16b, v3.16b",
+        "addp v3.16b, v2.16b, v2.16b",
+        "addp v2.8b, v3.8b, v3.8b",
+        "addp v3.8b, v2.8b, v2.8b",
+        "umov w20, v3.h[0]",
+        "mov x4, x20"
       ]
     },
     "maskmovdqu xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x66 0x0f 0xf7",
       "ExpectedArm64ASM": [
-        "cmlt v2.16b, v17.16b, #0",
-        "ldr q3, [x11]",
-        "bsl v2.16b, v16.16b, v3.16b",
-        "str q2, [x11]"
+        "mov v2.16b, v17.16b",
+        "cmlt v3.16b, v2.16b, #0",
+        "mov v2.16b, v16.16b",
+        "mov x20, x11",
+        "ldr q4, [x20]",
+        "mov v5.16b, v3.16b",
+        "bsl v5.16b, v2.16b, v4.16b",
+        "str q5, [x20]"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FlagM/Secondary_REP.json b/unittests/InstructionCountCI/FlagM/Secondary_REP.json
index d31556cba0..96a33f638a 100644
--- a/unittests/InstructionCountCI/FlagM/Secondary_REP.json
+++ b/unittests/InstructionCountCI/FlagM/Secondary_REP.json
@@ -14,106 +14,136 @@
   },
   "Instructions": {
     "popcnt ax, bx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 15,
       "Comment": "0xf3 0x0f 0xb8",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "fmov s0, w20",
+        "mov x20, x7",
+        "uxth w21, w20",
+        "fmov s0, w21",
         "cnt v0.8b, v0.8b",
         "addp v0.8b, v0.8b, v0.8b",
         "umov w20, v0.b[0]",
-        "bfxil x4, x20, #0, #16",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22",
         "tst w20, w20",
-        "mov w26, #0x1",
-        "mov w27, #0x0"
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20"
       ]
     },
     "popcnt eax, ebx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0xf3 0x0f 0xb8",
       "ExpectedArm64ASM": [
-        "fmov s0, w7",
+        "mov x20, x7",
+        "fmov s0, w20",
         "cnt v0.8b, v0.8b",
         "addv b0, v0.8b",
-        "umov w4, v0.b[0]",
-        "tst w4, w4",
-        "mov w26, #0x1",
-        "mov w27, #0x0"
+        "umov w21, v0.b[0]",
+        "mov x4, x21",
+        "tst w21, w21",
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20"
       ]
     },
     "popcnt rax, rbx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0xf3 0x0f 0xb8",
       "ExpectedArm64ASM": [
-        "fmov d0, x7",
+        "mov x20, x7",
+        "fmov d0, x20",
         "cnt v0.8b, v0.8b",
         "addv b0, v0.8b",
-        "umov w4, v0.b[0]",
-        "tst w4, w4",
-        "mov w26, #0x1",
-        "mov w27, #0x0"
+        "umov w21, v0.b[0]",
+        "mov x4, x21",
+        "tst w21, w21",
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20"
       ]
     },
     "tzcnt ax, bx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": "0xf3 0x0f 0xbc",
       "ExpectedArm64ASM": [
-        "rbit w20, w7",
-        "orr w20, w20, #0x8000",
-        "clz w20, w20",
-        "bfxil x4, x20, #0, #16",
-        "cmn wzr, w20, lsl #16",
-        "rmif x20, #3, #nzCv"
+        "mov x20, x7",
+        "rbit w21, w20",
+        "orr w21, w21, #0x8000",
+        "clz w21, w21",
+        "mov x20, x4",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22",
+        "cmn wzr, w21, lsl #16",
+        "rmif x21, #3, #nzCv"
       ]
     },
     "tzcnt eax, ebx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0xf3 0x0f 0xbc",
       "ExpectedArm64ASM": [
-        "rbit w4, w7",
-        "clz w4, w4",
-        "tst w4, w4",
-        "rmif x4, #4, #nzCv"
+        "mov x20, x7",
+        "rbit w21, w20",
+        "clz w21, w21",
+        "mov x4, x21",
+        "tst w21, w21",
+        "rmif x21, #4, #nzCv"
       ]
     },
     "tzcnt rax, rbx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0xf3 0x0f 0xbc",
       "ExpectedArm64ASM": [
-        "rbit x4, x7",
-        "clz x4, x4",
-        "tst x4, x4",
-        "rmif x4, #5, #nzCv"
+        "mov x20, x7",
+        "rbit x21, x20",
+        "clz x21, x21",
+        "mov x4, x21",
+        "tst x21, x21",
+        "rmif x21, #5, #nzCv"
       ]
     },
     "lzcnt ax, bx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": "0xf3 0x0f 0xbd",
       "ExpectedArm64ASM": [
-        "lsl w20, w7, #16",
-        "orr w20, w20, #0x8000",
-        "clz w20, w20",
-        "bfxil x4, x20, #0, #16",
-        "cmn wzr, w20, lsl #16",
-        "rmif x20, #3, #nzCv"
+        "mov x20, x7",
+        "lsl w21, w20, #16",
+        "orr w21, w21, #0x8000",
+        "clz w21, w21",
+        "mov x20, x4",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22",
+        "cmn wzr, w21, lsl #16",
+        "rmif x21, #3, #nzCv"
       ]
     },
     "lzcnt eax, ebx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xf3 0x0f 0xbd",
       "ExpectedArm64ASM": [
-        "clz w4, w7",
-        "tst w4, w4",
-        "rmif x4, #4, #nzCv"
+        "mov x20, x7",
+        "clz w21, w20",
+        "mov x4, x21",
+        "tst w21, w21",
+        "rmif x21, #4, #nzCv"
       ]
     },
     "lzcnt rax, rbx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xf3 0x0f 0xbd",
       "ExpectedArm64ASM": [
-        "clz x4, x7",
-        "tst x4, x4",
-        "rmif x4, #5, #nzCv"
+        "mov x20, x7",
+        "clz x21, x20",
+        "mov x4, x21",
+        "tst x21, x21",
+        "rmif x21, #5, #nzCv"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FlagM/VEX_map1.json b/unittests/InstructionCountCI/FlagM/VEX_map1.json
index 8fe066e27b..a531f4293f 100644
--- a/unittests/InstructionCountCI/FlagM/VEX_map1.json
+++ b/unittests/InstructionCountCI/FlagM/VEX_map1.json
@@ -15,108 +15,132 @@
   },
   "Instructions": {
     "vucomiss xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b00 0x2e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmp s16, s17",
-        "mov w27, #0x0",
-        "cset w26, vc",
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "fcmp s2, s3",
+        "mov w20, #0x0",
+        "cset w21, vc",
+        "mov x26, x21",
         "axflag",
-        "cfinv"
+        "cfinv",
+        "mov x27, x20"
       ]
     },
     "vucomisd xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b01 0x2e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmp d16, d17",
-        "mov w27, #0x0",
-        "cset w26, vc",
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "fcmp d2, d3",
+        "mov w20, #0x0",
+        "cset w21, vc",
+        "mov x26, x21",
         "axflag",
-        "cfinv"
+        "cfinv",
+        "mov x27, x20"
       ]
     },
     "vcomiss xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b00 0x2f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmp s16, s17",
-        "mov w27, #0x0",
-        "cset w26, vc",
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "fcmp s2, s3",
+        "mov w20, #0x0",
+        "cset w21, vc",
+        "mov x26, x21",
         "axflag",
-        "cfinv"
+        "cfinv",
+        "mov x27, x20"
       ]
     },
     "vcomisd xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b01 0x2f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmp d16, d17",
-        "mov w27, #0x0",
-        "cset w26, vc",
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "fcmp d2, d3",
+        "mov w20, #0x0",
+        "cset w21, vc",
+        "mov x26, x21",
         "axflag",
-        "cfinv"
+        "cfinv",
+        "mov x27, x20"
       ]
     },
     "vpmovmskb rax, xmm0": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b01 0xd7 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2272]",
-        "cmlt v3.16b, v16.16b, #0",
-        "and v2.16b, v3.16b, v2.16b",
-        "addp v2.16b, v2.16b, v2.16b",
-        "addp v2.8b, v2.8b, v2.8b",
-        "addp v2.8b, v2.8b, v2.8b",
-        "umov w4, v2.h[0]"
+        "mov z2.d, p7/m, z16.d",
+        "ldr q3, [x28, #2272]",
+        "cmlt v4.16b, v2.16b, #0",
+        "and v2.16b, v4.16b, v3.16b",
+        "addp v3.16b, v2.16b, v2.16b",
+        "addp v2.8b, v3.8b, v3.8b",
+        "addp v3.8b, v2.8b, v2.8b",
+        "umov w20, v3.h[0]",
+        "mov x4, x20"
       ]
     },
     "vpmovmskb rax, ymm0": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "Map 1 0b01 0xd7 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z16.d",
         "ldr x0, [x28, #1680]",
-        "ld1b {z2.b}, p7/z, [x0]",
+        "ld1b {z3.b}, p7/z, [x0]",
         "mrs x0, nzcv",
         "mov z0.d, #0",
-        "cmplt p0.b, p7/z, z16.b, #0",
-        "not z0.b, p0/m, z16.b",
-        "orr z0.b, p0/m, z0.b, z16.b",
-        "mov z3.d, z0.d",
+        "cmplt p0.b, p7/z, z2.b, #0",
+        "not z0.b, p0/m, z2.b",
+        "orr z0.b, p0/m, z0.b, z2.b",
+        "mov z4.d, z0.d",
         "msr nzcv, x0",
-        "and z2.d, z3.d, z2.d",
+        "and z2.d, z4.d, z3.d",
         "movprfx z0, z2",
         "addp z0.b, p7/m, z0.b, z2.b",
-        "uzp1 z2.b, z0.b, z0.b",
+        "uzp1 z3.b, z0.b, z0.b",
         "uzp2 z1.b, z0.b, z0.b",
-        "splice z2.d, p6, z2.d, z1.d",
-        "addp v2.16b, v2.16b, v2.16b",
-        "addp v2.8b, v2.8b, v2.8b",
-        "mov w4, v2.s[0]"
+        "splice z3.d, p6, z3.d, z1.d",
+        "addp v2.16b, v3.16b, v3.16b",
+        "addp v3.8b, v2.8b, v2.8b",
+        "mov w20, v3.s[0]",
+        "mov x4, x20"
       ]
     },
     "vmaskmovdqu xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b01 0xf7 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "cmlt v2.16b, v17.16b, #0",
-        "ldr q3, [x11]",
-        "bsl v2.16b, v16.16b, v3.16b",
-        "str q2, [x11]"
+        "mov z2.d, p7/m, z17.d",
+        "cmlt v3.16b, v2.16b, #0",
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x11",
+        "ldr q4, [x20]",
+        "mov v5.16b, v3.16b",
+        "bsl v5.16b, v2.16b, v4.16b",
+        "str q5, [x20]"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FlagM/VEX_map2.json b/unittests/InstructionCountCI/FlagM/VEX_map2.json
index f83e40ecdd..7d78d439f9 100644
--- a/unittests/InstructionCountCI/FlagM/VEX_map2.json
+++ b/unittests/InstructionCountCI/FlagM/VEX_map2.json
@@ -12,473 +12,569 @@
   },
   "Instructions": {
     "vtestps xmm0, xmm1": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "Map 2 0b01 0x0e 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
         "mov w20, #0x80000000",
-        "dup v2.4s, w20",
-        "and v3.16b, v17.16b, v16.16b",
-        "bic v4.16b, v17.16b, v16.16b",
-        "and v3.16b, v3.16b, v2.16b",
-        "and v2.16b, v4.16b, v2.16b",
-        "umaxv h3, v3.8h",
-        "umaxv h2, v2.8h",
-        "umov w20, v3.h[0]",
+        "dup v4.4s, w20",
+        "and v5.16b, v3.16b, v2.16b",
+        "bic v6.16b, v3.16b, v2.16b",
+        "and v2.16b, v5.16b, v4.16b",
+        "and v3.16b, v6.16b, v4.16b",
+        "umaxv h4, v2.8h",
+        "umaxv h2, v3.8h",
+        "umov w20, v4.h[0]",
         "umov w21, v2.h[0]",
-        "mov w27, #0x0",
-        "mov w26, #0x1",
+        "mov w22, #0x0",
+        "mov w23, #0x1",
         "cmp x21, #0x0 (0)",
-        "cset x21, eq",
+        "cset x24, eq",
         "tst w20, w20",
-        "rmif x21, #63, #nzCv"
+        "rmif x24, #63, #nzCv",
+        "mov x26, x23",
+        "mov x27, x22"
       ]
     },
     "vtestps ymm0, ymm1": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "Map 2 0b01 0x0e 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
         "mov w20, #0x80000000",
-        "mov z2.s, w20",
-        "and z3.d, z17.d, z16.d",
-        "bic z4.d, z17.d, z16.d",
-        "and z3.d, z3.d, z2.d",
-        "and z2.d, z4.d, z2.d",
-        "umaxv h3, p7, z3.h",
-        "umaxv h2, p7, z2.h",
-        "umov w20, v3.h[0]",
+        "mov z4.s, w20",
+        "and z5.d, z3.d, z2.d",
+        "bic z6.d, z3.d, z2.d",
+        "and z2.d, z5.d, z4.d",
+        "and z3.d, z6.d, z4.d",
+        "umaxv h4, p7, z2.h",
+        "umaxv h2, p7, z3.h",
+        "umov w20, v4.h[0]",
         "umov w21, v2.h[0]",
-        "mov w27, #0x0",
-        "mov w26, #0x1",
+        "mov w22, #0x0",
+        "mov w23, #0x1",
         "cmp x21, #0x0 (0)",
-        "cset x21, eq",
+        "cset x24, eq",
         "tst w20, w20",
-        "rmif x21, #63, #nzCv"
+        "rmif x24, #63, #nzCv",
+        "mov x26, x23",
+        "mov x27, x22"
       ]
     },
     "vtestpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "Map 2 0b01 0x0f 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
         "mov x20, #0x8000000000000000",
-        "dup v2.2d, x20",
-        "and v3.16b, v17.16b, v16.16b",
-        "bic v4.16b, v17.16b, v16.16b",
-        "and v3.16b, v3.16b, v2.16b",
-        "and v2.16b, v4.16b, v2.16b",
-        "umaxv h3, v3.8h",
-        "umaxv h2, v2.8h",
-        "umov w20, v3.h[0]",
+        "dup v4.2d, x20",
+        "and v5.16b, v3.16b, v2.16b",
+        "bic v6.16b, v3.16b, v2.16b",
+        "and v2.16b, v5.16b, v4.16b",
+        "and v3.16b, v6.16b, v4.16b",
+        "umaxv h4, v2.8h",
+        "umaxv h2, v3.8h",
+        "umov w20, v4.h[0]",
         "umov w21, v2.h[0]",
-        "mov w27, #0x0",
-        "mov w26, #0x1",
+        "mov w22, #0x0",
+        "mov w23, #0x1",
         "cmp x21, #0x0 (0)",
-        "cset x21, eq",
+        "cset x24, eq",
         "tst w20, w20",
-        "rmif x21, #63, #nzCv"
+        "rmif x24, #63, #nzCv",
+        "mov x26, x23",
+        "mov x27, x22"
       ]
     },
     "vtestpd ymm0, ymm1": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "Map 2 0b01 0x0f 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
         "mov x20, #0x8000000000000000",
-        "mov z2.d, x20",
-        "and z3.d, z17.d, z16.d",
-        "bic z4.d, z17.d, z16.d",
-        "and z3.d, z3.d, z2.d",
-        "and z2.d, z4.d, z2.d",
-        "umaxv h3, p7, z3.h",
-        "umaxv h2, p7, z2.h",
-        "umov w20, v3.h[0]",
+        "mov z4.d, x20",
+        "and z5.d, z3.d, z2.d",
+        "bic z6.d, z3.d, z2.d",
+        "and z2.d, z5.d, z4.d",
+        "and z3.d, z6.d, z4.d",
+        "umaxv h4, p7, z2.h",
+        "umaxv h2, p7, z3.h",
+        "umov w20, v4.h[0]",
         "umov w21, v2.h[0]",
-        "mov w27, #0x0",
-        "mov w26, #0x1",
+        "mov w22, #0x0",
+        "mov w23, #0x1",
         "cmp x21, #0x0 (0)",
-        "cset x21, eq",
+        "cset x24, eq",
         "tst w20, w20",
-        "rmif x21, #63, #nzCv"
+        "rmif x24, #63, #nzCv",
+        "mov x26, x23",
+        "mov x27, x22"
       ]
     },
     "vptest xmm0, xmm1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 16,
       "Comment": [
         "Map 2 0b01 0x16 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "and v2.16b, v16.16b, v17.16b",
-        "bic v3.16b, v17.16b, v16.16b",
-        "umaxv h2, v2.8h",
-        "umaxv h3, v3.8h",
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "and v4.16b, v2.16b, v3.16b",
+        "bic v5.16b, v3.16b, v2.16b",
+        "umaxv h2, v4.8h",
+        "umaxv h3, v5.8h",
         "umov w20, v2.h[0]",
         "umov w21, v3.h[0]",
-        "mov w27, #0x0",
-        "mov w26, #0x1",
+        "mov w22, #0x0",
+        "mov w23, #0x1",
         "cmp x21, #0x0 (0)",
-        "cset x21, eq",
+        "cset x24, eq",
         "tst w20, w20",
-        "rmif x21, #63, #nzCv"
+        "rmif x24, #63, #nzCv",
+        "mov x26, x23",
+        "mov x27, x22"
       ]
     },
     "vptest ymm0, ymm1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 16,
       "Comment": [
         "Map 2 0b01 0x16 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "and z2.d, z16.d, z17.d",
-        "bic z3.d, z17.d, z16.d",
-        "umaxv h2, p7, z2.h",
-        "umaxv h3, p7, z3.h",
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "and z4.d, z2.d, z3.d",
+        "bic z5.d, z3.d, z2.d",
+        "umaxv h2, p7, z4.h",
+        "umaxv h3, p7, z5.h",
         "umov w20, v2.h[0]",
         "umov w21, v3.h[0]",
-        "mov w27, #0x0",
-        "mov w26, #0x1",
+        "mov w22, #0x0",
+        "mov w23, #0x1",
         "cmp x21, #0x0 (0)",
-        "cset x21, eq",
+        "cset x24, eq",
         "tst w20, w20",
-        "rmif x21, #63, #nzCv"
+        "rmif x24, #63, #nzCv",
+        "mov x26, x23",
+        "mov x27, x22"
       ]
     },
     "vmaskmovps xmm0, xmm1, [rax]": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 2 0b01 0x2c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.s, p6/z, z17.s, #0",
-        "ld1w {z2.s}, p0/z, [x4]",
-        "mov v16.16b, v2.16b",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.s, p6/z, z2.s, #0",
+        "ld1w {z3.s}, p0/z, [x20]",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d",
+        "msr nzcv, x21"
       ]
     },
     "vmaskmovps ymm0, ymm1, [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x2c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.s, p7/z, z17.s, #0",
-        "ld1w {z16.s}, p0/z, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.s, p7/z, z2.s, #0",
+        "ld1w {z3.s}, p0/z, [x20]",
+        "mov z16.d, p7/m, z3.d",
+        "msr nzcv, x21"
       ]
     },
     "vmaskmovpd xmm0, xmm1, [rax]": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 2 0b01 0x2d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.d, p6/z, z17.d, #0",
-        "ld1d {z2.d}, p0/z, [x4]",
-        "mov v16.16b, v2.16b",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.d, p6/z, z2.d, #0",
+        "ld1d {z3.d}, p0/z, [x20]",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d",
+        "msr nzcv, x21"
       ]
     },
     "vmaskmovpd ymm0, ymm1, [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x2d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.d, p7/z, z17.d, #0",
-        "ld1d {z16.d}, p0/z, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.d, p7/z, z2.d, #0",
+        "ld1d {z3.d}, p0/z, [x20]",
+        "mov z16.d, p7/m, z3.d",
+        "msr nzcv, x21"
       ]
     },
     "vmaskmovps [rax], xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x2e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.s, p6/z, z16.s, #0",
-        "st1w {z17.s}, p0, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.s, p6/z, z2.s, #0",
+        "st1w {z3.s}, p0, [x20]",
+        "msr nzcv, x21"
       ]
     },
     "vmaskmovps [rax], ymm0, ymm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x2e 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.s, p7/z, z16.s, #0",
-        "st1w {z17.s}, p0, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.s, p7/z, z2.s, #0",
+        "st1w {z3.s}, p0, [x20]",
+        "msr nzcv, x21"
       ]
     },
     "vmaskmovpd [rax], xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x2f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.d, p6/z, z16.d, #0",
-        "st1d {z17.d}, p0, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.d, p6/z, z2.d, #0",
+        "st1d {z3.d}, p0, [x20]",
+        "msr nzcv, x21"
       ]
     },
     "vmaskmovpd [rax], ymm0, ymm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x2f 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.d, p7/z, z16.d, #0",
-        "st1d {z17.d}, p0, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.d, p7/z, z2.d, #0",
+        "st1d {z3.d}, p0, [x20]",
+        "msr nzcv, x21"
       ]
     },
     "vpmaskmovd xmm0, xmm1, [rax]": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 2 0b01 0x8c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.s, p6/z, z17.s, #0",
-        "ld1w {z2.s}, p0/z, [x4]",
-        "mov v16.16b, v2.16b",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.s, p6/z, z2.s, #0",
+        "ld1w {z3.s}, p0/z, [x20]",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d",
+        "msr nzcv, x21"
       ]
     },
     "vpmaskmovd ymm0, ymm1, [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x8c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.s, p7/z, z17.s, #0",
-        "ld1w {z16.s}, p0/z, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.s, p7/z, z2.s, #0",
+        "ld1w {z3.s}, p0/z, [x20]",
+        "mov z16.d, p7/m, z3.d",
+        "msr nzcv, x21"
       ]
     },
     "vpmaskmovq xmm0, xmm1, [rax]": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 2 0b01 0x8c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.d, p6/z, z17.d, #0",
-        "ld1d {z2.d}, p0/z, [x4]",
-        "mov v16.16b, v2.16b",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.d, p6/z, z2.d, #0",
+        "ld1d {z3.d}, p0/z, [x20]",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d",
+        "msr nzcv, x21"
       ]
     },
     "vpmaskmovq ymm0, ymm1, [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x8c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.d, p7/z, z17.d, #0",
-        "ld1d {z16.d}, p0/z, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.d, p7/z, z2.d, #0",
+        "ld1d {z3.d}, p0/z, [x20]",
+        "mov z16.d, p7/m, z3.d",
+        "msr nzcv, x21"
       ]
     },
     "vpmaskmovd [rax], xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x8e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.s, p6/z, z16.s, #0",
-        "st1w {z17.s}, p0, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.s, p6/z, z2.s, #0",
+        "st1w {z3.s}, p0, [x20]",
+        "msr nzcv, x21"
       ]
     },
     "vpmaskmovd [rax], ymm0, ymm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x8e 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.s, p7/z, z16.s, #0",
-        "st1w {z17.s}, p0, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.s, p7/z, z2.s, #0",
+        "st1w {z3.s}, p0, [x20]",
+        "msr nzcv, x21"
       ]
     },
     "vpmaskmovq [rax], xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x8e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.d, p6/z, z16.d, #0",
-        "st1d {z17.d}, p0, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.d, p6/z, z2.d, #0",
+        "st1d {z3.d}, p0, [x20]",
+        "msr nzcv, x21"
       ]
     },
     "vpmaskmovq [rax], ymm0, ymm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x8e 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.d, p7/z, z16.d, #0",
-        "st1d {z17.d}, p0, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.d, p7/z, z2.d, #0",
+        "st1d {z3.d}, p0, [x20]",
+        "msr nzcv, x21"
       ]
     },
     "andn eax, ebx, ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b00 0xf2 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "bic w4, w5, w7",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x7",
+        "mov x21, x5",
+        "bic w22, w21, w20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "andn rax, rbx, rcx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b00 0xf2 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "bic x4, x5, x7",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x7",
+        "mov x21, x5",
+        "bic x22, x21, x20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "bzhi eax, ebx, ecx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 2 0b00 0xf5 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov w20, #0xffffffff",
-        "lsl w20, w20, w5",
-        "bic w20, w7, w20",
-        "tst x5, #0xe0",
-        "csel w4, w7, w20, ne",
+        "mov x20, x7",
+        "mov x21, x5",
+        "mov w22, #0xffffffff",
+        "lsl w23, w22, w21",
+        "bic w22, w20, w23",
+        "tst x21, #0xe0",
+        "csel w21, w20, w22, ne",
+        "mov x4, x21",
         "cset w20, ne",
-        "tst w4, w4",
+        "tst w21, w21",
         "rmif x20, #63, #nzCv"
       ]
     },
     "bzhi rax, rbx, rcx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 2 0b00 0xf5 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov x20, #0xffffffffffffffff",
-        "lsl x20, x20, x5",
-        "bic x20, x7, x20",
-        "tst x5, #0xc0",
-        "csel x4, x7, x20, ne",
+        "mov x20, x7",
+        "mov x21, x5",
+        "mov x22, #0xffffffffffffffff",
+        "lsl x23, x22, x21",
+        "bic x22, x20, x23",
+        "tst x21, #0xc0",
+        "csel x21, x20, x22, ne",
+        "mov x4, x21",
         "cset w20, ne",
-        "tst x4, x4",
+        "tst x21, x21",
         "rmif x20, #63, #nzCv"
       ]
     },
     "pdep eax, ebx, ecx": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "Map 2 0b11 0xf5 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov x0, x7",
-        "mov x1, x5",
-        "mov w4, #0x0",
-        "cbz w5, #+0x2c",
+        "mov x20, x7",
+        "mov x21, x5",
+        "mov x0, x20",
+        "mov x1, x21",
+        "mov w22, #0x0",
+        "cbz w21, #+0x2c",
         "neg w2, w1",
         "and w2, w2, w1",
         "sbfx w3, w0, #0, #1",
         "eor w1, w1, w2",
         "and w2, w3, w2",
         "neg w3, w1",
-        "orr w4, w4, w2",
+        "orr w22, w22, w2",
         "lsr w0, w0, #1",
         "and w2, w1, w3",
-        "cbnz w2, #-0x1c"
+        "cbnz w2, #-0x1c",
+        "mov x4, x22"
       ]
     },
     "pdep rax, rbx, rcx": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "Map 2 0b11 0xf5 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov x0, x7",
-        "mov x1, x5",
-        "mov x4, #0x0",
-        "cbz x5, #+0x2c",
+        "mov x20, x7",
+        "mov x21, x5",
+        "mov x0, x20",
+        "mov x1, x21",
+        "mov x22, #0x0",
+        "cbz x21, #+0x2c",
         "neg x2, x1",
         "and x2, x2, x1",
         "sbfx x3, x0, #0, #1",
         "eor x1, x1, x2",
         "and x2, x3, x2",
         "neg x3, x1",
-        "orr x4, x4, x2",
+        "orr x22, x22, x2",
         "lsr x0, x0, #1",
         "and x2, x1, x3",
-        "cbnz x2, #-0x1c"
+        "cbnz x2, #-0x1c",
+        "mov x4, x22"
       ]
     },
     "bextr eax, ebx, ecx": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "Map 2 0b00 0xf7 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "uxtb w20, w5",
-        "lsr w21, w7, w20",
-        "mov w22, #0x0",
+        "mov x20, x7",
+        "mov x21, x5",
+        "uxtb w22, w21",
+        "lsr w23, w20, w22",
+        "mov w20, #0x0",
+        "cmp w22, #0x1f (31)",
+        "csel w24, w23, w20, ls",
+        "ubfx w20, w21, #8, #8",
+        "mov x21, #0xffffffffffffffff",
+        "lsl w22, w21, w20",
+        "bic w21, w24, w22",
         "cmp w20, #0x1f (31)",
-        "csel w20, w21, w22, ls",
-        "ubfx w21, w5, #8, #8",
-        "mov x22, #0xffffffffffffffff",
-        "lsl w22, w22, w21",
-        "bic w22, w20, w22",
-        "cmp w21, #0x1f (31)",
-        "csel w4, w22, w20, ls",
-        "tst w4, w4"
+        "csel w22, w21, w24, ls",
+        "mov x4, x22",
+        "tst w22, w22"
       ]
     },
     "bextr rax, rbx, rcx": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "Map 2 0b00 0xf7 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "uxtb x20, w5",
-        "lsr x21, x7, x20",
-        "mov w22, #0x0",
+        "mov x20, x7",
+        "mov x21, x5",
+        "uxtb x22, w21",
+        "lsr x23, x20, x22",
+        "mov w20, #0x0",
+        "cmp x22, #0x3f (63)",
+        "csel x24, x23, x20, ls",
+        "ubfx x20, x21, #8, #8",
+        "mov x21, #0xffffffffffffffff",
+        "lsl x22, x21, x20",
+        "bic x21, x24, x22",
         "cmp x20, #0x3f (63)",
-        "csel x20, x21, x22, ls",
-        "ubfx x21, x5, #8, #8",
-        "mov x22, #0xffffffffffffffff",
-        "lsl x22, x22, x21",
-        "bic x22, x20, x22",
-        "cmp x21, #0x3f (63)",
-        "csel x4, x22, x20, ls",
-        "tst x4, x4"
+        "csel x22, x21, x24, ls",
+        "mov x4, x22",
+        "tst x22, x22"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/FlagM/VEX_map_group.json b/unittests/InstructionCountCI/FlagM/VEX_map_group.json
index 277bfad731..d9e69af9a6 100644
--- a/unittests/InstructionCountCI/FlagM/VEX_map_group.json
+++ b/unittests/InstructionCountCI/FlagM/VEX_map_group.json
@@ -10,83 +10,95 @@
   },
   "Instructions": {
     "blsr eax, ebx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map group 17 0b001 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "sub w20, w7, #0x1 (1)",
-        "and w4, w20, w7",
-        "cmp x7, #0x0 (0)",
-        "cset x20, eq",
-        "tst w4, w4",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "sub w21, w20, #0x1 (1)",
+        "and w22, w21, w20",
+        "mov x4, x22",
+        "cmp x20, #0x0 (0)",
+        "cset x21, eq",
+        "tst w22, w22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "blsr rax, rbx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map group 17 0b001 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "sub x20, x7, #0x1 (1)",
-        "and x4, x20, x7",
-        "cmp x7, #0x0 (0)",
-        "cset x20, eq",
-        "tst x4, x4",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "sub x21, x20, #0x1 (1)",
+        "and x22, x21, x20",
+        "mov x4, x22",
+        "cmp x20, #0x0 (0)",
+        "cset x21, eq",
+        "tst x22, x22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "blsmsk eax, ebx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map group 17 0b010 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "sub w20, w7, #0x1 (1)",
-        "eor w4, w20, w7",
-        "cmp x7, #0x0 (0)",
-        "cset x20, eq",
-        "tst w4, w4",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "sub w21, w20, #0x1 (1)",
+        "eor w22, w21, w20",
+        "mov x4, x22",
+        "cmp x20, #0x0 (0)",
+        "cset x21, eq",
+        "tst w22, w22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "blsmsk rax, rbx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map group 17 0b010 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "sub x20, x7, #0x1 (1)",
-        "eor x4, x20, x7",
-        "cmp x7, #0x0 (0)",
-        "cset x20, eq",
-        "tst x4, x4",
-        "rmif x20, #63, #nzCv"
+        "mov x20, x7",
+        "sub x21, x20, #0x1 (1)",
+        "eor x22, x21, x20",
+        "mov x4, x22",
+        "cmp x20, #0x0 (0)",
+        "cset x21, eq",
+        "tst x22, x22",
+        "rmif x21, #63, #nzCv"
       ]
     },
     "blsi eax, ebx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map group 17 0b011 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "neg w20, w7",
-        "and w4, w7, w20",
-        "tst w4, w4",
+        "mov x20, x7",
+        "neg w21, w20",
+        "and w22, w20, w21",
+        "mov x4, x22",
+        "tst w22, w22",
         "cset w20, ne",
         "rmif x20, #63, #nzCv"
       ]
     },
     "blsi rax, rbx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map group 17 0b011 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "neg x20, x7",
-        "and x4, x7, x20",
-        "tst x4, x4",
+        "mov x20, x7",
+        "neg x21, x20",
+        "and x22, x20, x21",
+        "mov x4, x22",
+        "tst x22, x22",
         "cset w20, ne",
         "rmif x20, #63, #nzCv"
       ]
diff --git a/unittests/InstructionCountCI/FlagM/x87.json b/unittests/InstructionCountCI/FlagM/x87.json
index fde9f01d68..a552ea45f9 100644
--- a/unittests/InstructionCountCI/FlagM/x87.json
+++ b/unittests/InstructionCountCI/FlagM/x87.json
@@ -14,13 +14,14 @@
   },
   "Instructions": {
     "fadd dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xd8 !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -45,11 +46,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -62,10 +63,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1408]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -77,21 +78,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xd8 !11b /1"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -116,11 +118,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -133,10 +135,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1424]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -148,21 +150,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fcom dword [rax]": {
-      "ExpectedInstructionCount": 69,
+      "ExpectedInstructionCount": 70,
       "Comment": [
         "0xd8 !11b /2"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -187,11 +190,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -204,10 +207,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1304]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -222,24 +225,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcomp dword [rax]": {
-      "ExpectedInstructionCount": 77,
+      "ExpectedInstructionCount": 78,
       "Comment": [
         "0xd8 !11b /3"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -264,11 +268,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -281,10 +285,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1304]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -299,32 +303,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fsub dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xd8 !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -349,11 +354,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -366,10 +371,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1416]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -381,21 +386,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xd8 !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -420,11 +426,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -437,10 +443,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
-        "mov x3, v3.d[0]",
-        "umov w4, v3.h[4]",
+        "mov x1, v3.d[0]",
+        "umov w2, v3.h[4]",
+        "mov x3, v2.d[0]",
+        "umov w4, v2.h[4]",
         "ldr x5, [x28, #1416]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -452,21 +458,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xd8 !11b /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -491,11 +498,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -508,10 +515,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1432]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -523,21 +530,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xd8 !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -562,11 +570,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -579,10 +587,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
-        "mov x3, v3.d[0]",
-        "umov w4, v3.h[4]",
+        "mov x1, v3.d[0]",
+        "umov w2, v3.h[4]",
+        "mov x3, v2.d[0]",
+        "umov w4, v2.h[4]",
         "ldr x5, [x28, #1432]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -594,11 +602,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st0, st0": {
@@ -609,8 +617,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -641,11 +649,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st0, st1": {
@@ -656,8 +664,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -688,11 +696,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st0, st2": {
@@ -703,8 +711,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -735,11 +743,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st0, st3": {
@@ -750,8 +758,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -782,11 +790,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st0, st4": {
@@ -797,8 +805,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -829,11 +837,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st0, st5": {
@@ -844,8 +852,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -876,11 +884,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st0, st6": {
@@ -891,8 +899,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -923,11 +931,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st0, st7": {
@@ -938,8 +946,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -970,11 +978,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st0, st0": {
@@ -985,8 +993,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1017,11 +1025,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st0, st1": {
@@ -1032,8 +1040,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1064,11 +1072,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st0, st2": {
@@ -1079,8 +1087,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1111,11 +1119,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st0, st3": {
@@ -1126,8 +1134,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1158,11 +1166,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st0, st4": {
@@ -1173,8 +1181,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1205,11 +1213,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st0, st5": {
@@ -1220,8 +1228,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1252,11 +1260,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st0, st6": {
@@ -1267,8 +1275,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1299,11 +1307,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st0, st7": {
@@ -1314,8 +1322,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1346,11 +1354,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fcom st0, st0": {
@@ -1362,8 +1370,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x0",
         "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1397,13 +1405,13 @@
         "mov x20, x0",
         "ubfx x22, x20, #1, #1",
         "ubfx x23, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w22, w22, w20",
-        "orr w23, w23, w20",
-        "strb w22, [x28, #744]",
+        "ubfx x24, x20, #2, #1",
+        "orr w20, w22, w24",
+        "orr w22, w23, w24",
+        "strb w20, [x28, #744]",
         "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w23, [x28, #750]"
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]"
       ]
     },
     "fcom st0, st1": {
@@ -1414,8 +1422,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1449,14 +1457,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcom st0, st2": {
@@ -1467,8 +1475,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1502,14 +1510,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcom st0, st3": {
@@ -1520,8 +1528,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1555,14 +1563,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcom st0, st4": {
@@ -1573,8 +1581,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1608,14 +1616,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcom st0, st5": {
@@ -1626,8 +1634,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1661,14 +1669,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcom st0, st6": {
@@ -1679,8 +1687,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1714,14 +1722,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcom st0, st7": {
@@ -1732,8 +1740,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1767,14 +1775,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcomp st0, st0": {
@@ -1786,8 +1794,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x0",
         "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1821,20 +1829,20 @@
         "mov x22, x0",
         "ubfx x23, x22, #1, #1",
         "ubfx x24, x22, #0, #1",
-        "ubfx x22, x22, #2, #1",
-        "orr w23, w23, w22",
-        "orr w24, w24, w22",
-        "strb w23, [x28, #744]",
+        "ubfx x25, x22, #2, #1",
+        "orr w22, w23, w25",
+        "orr w23, w24, w25",
+        "strb w22, [x28, #744]",
         "strb w21, [x28, #745]",
-        "strb w22, [x28, #746]",
-        "strb w24, [x28, #750]",
+        "strb w25, [x28, #746]",
+        "strb w23, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -1847,8 +1855,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1882,20 +1890,20 @@
         "mov x22, x0",
         "ubfx x23, x22, #1, #1",
         "ubfx x24, x22, #0, #1",
-        "ubfx x22, x22, #2, #1",
-        "orr w23, w23, w22",
-        "orr w24, w24, w22",
-        "strb w23, [x28, #744]",
-        "mov w23, #0x0",
-        "strb w23, [x28, #745]",
-        "strb w22, [x28, #746]",
-        "strb w24, [x28, #750]",
+        "ubfx x25, x22, #2, #1",
+        "orr w22, w23, w25",
+        "orr w23, w24, w25",
+        "strb w22, [x28, #744]",
+        "mov w22, #0x0",
+        "strb w22, [x28, #745]",
+        "strb w25, [x28, #746]",
+        "strb w23, [x28, #750]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -1907,8 +1915,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1942,21 +1950,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -1968,8 +1976,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2003,21 +2011,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -2029,8 +2037,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2064,21 +2072,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -2090,8 +2098,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2125,21 +2133,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -2151,8 +2159,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2186,21 +2194,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -2212,8 +2220,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2247,21 +2255,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -2273,8 +2281,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2305,11 +2313,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st0, st1": {
@@ -2320,8 +2328,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2352,11 +2360,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st0, st2": {
@@ -2367,8 +2375,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2399,11 +2407,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st0, st3": {
@@ -2414,8 +2422,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2446,11 +2454,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st0, st4": {
@@ -2461,8 +2469,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2493,11 +2501,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st0, st5": {
@@ -2508,8 +2516,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2540,11 +2548,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st0, st6": {
@@ -2555,8 +2563,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2587,11 +2595,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st0, st7": {
@@ -2602,8 +2610,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2634,11 +2642,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st0, st0": {
@@ -2649,8 +2657,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2681,11 +2689,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st0, st1": {
@@ -2696,8 +2704,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2728,11 +2736,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st0, st2": {
@@ -2743,8 +2751,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2775,11 +2783,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st0, st3": {
@@ -2790,8 +2798,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2822,11 +2830,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st0, st4": {
@@ -2837,8 +2845,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2869,11 +2877,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st0, st5": {
@@ -2884,8 +2892,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2916,11 +2924,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st0, st6": {
@@ -2931,8 +2939,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2963,11 +2971,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st0, st7": {
@@ -2978,8 +2986,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3010,11 +3018,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st0, st0": {
@@ -3025,8 +3033,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3057,11 +3065,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st0, st1": {
@@ -3072,8 +3080,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3104,11 +3112,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st0, st2": {
@@ -3119,8 +3127,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3151,11 +3159,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st0, st3": {
@@ -3166,8 +3174,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3198,11 +3206,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st0, st4": {
@@ -3213,8 +3221,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3245,11 +3253,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st0, st5": {
@@ -3260,8 +3268,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3292,11 +3300,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st0, st6": {
@@ -3307,8 +3315,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3339,11 +3347,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st0, st7": {
@@ -3354,8 +3362,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3386,11 +3394,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st0, st0": {
@@ -3401,8 +3409,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3433,11 +3441,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st0, st1": {
@@ -3448,8 +3456,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3480,11 +3488,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st0, st2": {
@@ -3495,8 +3503,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3527,11 +3535,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st0, st3": {
@@ -3542,8 +3550,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3574,11 +3582,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st0, st4": {
@@ -3589,8 +3597,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3621,11 +3629,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st0, st5": {
@@ -3636,8 +3644,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3668,11 +3676,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st0, st6": {
@@ -3683,8 +3691,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3715,11 +3723,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st0, st7": {
@@ -3730,8 +3738,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3762,21 +3770,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fld dword [rax]": {
-      "ExpectedInstructionCount": 39,
+      "ExpectedInstructionCount": 40,
       "Comment": [
         "0xd9 !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -3801,23 +3810,23 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fst dword [rax]": {
-      "ExpectedInstructionCount": 30,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "0xd9 !11b /2"
       ],
@@ -3850,12 +3859,13 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x4]"
+        "fmov s3, s0",
+        "mov x20, x4",
+        "str s3, [x20]"
       ]
     },
     "fstp dword [rax]": {
-      "ExpectedInstructionCount": 38,
+      "ExpectedInstructionCount": 39,
       "Comment": [
         "0xd9 !11b /3"
       ],
@@ -3888,82 +3898,85 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x4]",
+        "fmov s3, s0",
+        "mov x21, x4",
+        "str s3, [x21]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fldenv [rax]": {
-      "ExpectedInstructionCount": 48,
+      "ExpectedInstructionCount": 49,
       "Comment": [
         "0xd9 !11b /4"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x4]",
-        "strh w20, [x28, #1024]",
-        "ldr w20, [x4, #4]",
-        "ubfx w21, w20, #11, #3",
-        "strb w21, [x28, #747]",
-        "ubfx w21, w20, #8, #1",
-        "ubfx w22, w20, #9, #1",
-        "ubfx w23, w20, #10, #1",
-        "ubfx w20, w20, #14, #1",
-        "strb w21, [x28, #744]",
-        "strb w22, [x28, #745]",
-        "strb w23, [x28, #746]",
-        "strb w20, [x28, #750]",
-        "ldr w20, [x4, #8]",
-        "ubfx w21, w20, #0, #2",
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "strh w21, [x28, #1024]",
+        "ldr w21, [x20, #4]",
+        "ubfx w22, w21, #11, #3",
+        "strb w22, [x28, #747]",
+        "ubfx w22, w21, #8, #1",
+        "ubfx w23, w21, #9, #1",
+        "ubfx w24, w21, #10, #1",
+        "ubfx w25, w21, #14, #1",
+        "strb w22, [x28, #744]",
+        "strb w23, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w25, [x28, #750]",
+        "ldr w21, [x20, #8]",
+        "ubfx w20, w21, #0, #2",
         "mrs x22, nzcv",
-        "cmp x21, #0x3 (3)",
-        "cset x21, ne",
-        "ubfx w23, w20, #2, #2",
-        "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #1",
-        "ubfx w23, w20, #4, #2",
-        "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #2",
-        "ubfx w23, w20, #6, #2",
-        "cmp x23, #0x3 (3)",
+        "cmp x20, #0x3 (3)",
         "cset x23, ne",
-        "orr w21, w21, w23, lsl #3",
-        "ubfx w23, w20, #8, #2",
+        "ubfx w20, w21, #2, #2",
+        "cmp x20, #0x3 (3)",
+        "cset x24, ne",
+        "orr w20, w23, w24, lsl #1",
+        "ubfx w23, w21, #4, #2",
         "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #4",
-        "ubfx w23, w20, #10, #2",
+        "cset x24, ne",
+        "orr w23, w20, w24, lsl #2",
+        "ubfx w20, w21, #6, #2",
+        "cmp x20, #0x3 (3)",
+        "cset x24, ne",
+        "orr w20, w23, w24, lsl #3",
+        "ubfx w23, w21, #8, #2",
         "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #5",
-        "ubfx w23, w20, #12, #2",
+        "cset x24, ne",
+        "orr w23, w20, w24, lsl #4",
+        "ubfx w20, w21, #10, #2",
+        "cmp x20, #0x3 (3)",
+        "cset x24, ne",
+        "orr w20, w23, w24, lsl #5",
+        "ubfx w23, w21, #12, #2",
         "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #6",
-        "ubfx w20, w20, #14, #2",
+        "cset x24, ne",
+        "orr w23, w20, w24, lsl #6",
+        "ubfx w20, w21, #14, #2",
         "cmp x20, #0x3 (3)",
-        "cset x20, ne",
-        "orr w20, w21, w20, lsl #7",
+        "cset x21, ne",
+        "orr w20, w23, w21, lsl #7",
         "strb w20, [x28, #1026]",
         "msr nzcv, x22"
       ]
     },
     "fldcw [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0xd9 !11b /5"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x4]",
-        "strh w20, [x28, #1024]"
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "strh w21, [x28, #1024]"
       ]
     },
     "fnstenv [rax]": {
@@ -3972,80 +3985,81 @@
         "0xd9 !11b /6"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x28, #1024]",
-        "str w20, [x4]",
-        "mov w20, #0x0",
-        "ldrb w21, [x28, #747]",
-        "mov x0, x20",
-        "bfi x0, x21, #11, #3",
-        "mov x21, x0",
+        "mov x20, x4",
+        "ldrh w21, [x28, #1024]",
+        "str w21, [x20]",
+        "mov w21, #0x0",
+        "ldrb w22, [x28, #747]",
+        "mov x23, x21",
+        "bfi x23, x22, #11, #3",
         "ldrb w22, [x28, #744]",
-        "ldrb w23, [x28, #745]",
-        "ldrb w24, [x28, #746]",
-        "ldrb w25, [x28, #750]",
-        "orr x21, x21, x22, lsl #8",
-        "orr x21, x21, x23, lsl #9",
-        "orr x21, x21, x24, lsl #10",
-        "orr x21, x21, x25, lsl #14",
-        "str w21, [x4, #4]",
-        "ldrb w21, [x28, #1026]",
-        "and w22, w21, #0x1",
-        "mov w23, #0x3",
-        "mrs x24, nzcv",
+        "ldrb w24, [x28, #745]",
+        "ldrb w25, [x28, #746]",
+        "ldrb w30, [x28, #750]",
+        "orr x18, x23, x22, lsl #8",
+        "orr x22, x18, x24, lsl #9",
+        "orr x23, x22, x25, lsl #10",
+        "orr x22, x23, x30, lsl #14",
+        "str w22, [x20, #4]",
+        "ldrb w22, [x28, #1026]",
+        "and w23, w22, #0x1",
+        "mov w24, #0x3",
+        "mrs x25, nzcv",
+        "cmp x23, #0x0 (0)",
+        "csel x30, x24, x21, eq",
+        "orr w23, w21, w30",
+        "lsr w30, w22, #1",
+        "and w18, w30, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x30, x24, x21, eq",
+        "orr w18, w23, w30, lsl #2",
+        "lsr w23, w22, #2",
+        "and w30, w23, #0x1",
+        "cmp x30, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w30, w18, w23, lsl #4",
+        "lsr w23, w22, #3",
+        "and w18, w23, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w18, w30, w23, lsl #6",
+        "lsr w23, w22, #4",
+        "and w30, w23, #0x1",
+        "cmp x30, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w30, w18, w23, lsl #8",
+        "lsr w23, w22, #5",
+        "and w18, w23, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w18, w30, w23, lsl #10",
+        "lsr w23, w22, #6",
+        "and w30, w23, #0x1",
+        "cmp x30, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w30, w18, w23, lsl #12",
+        "lsr w23, w22, #7",
+        "and w22, w23, #0x1",
         "cmp x22, #0x0 (0)",
-        "csel x22, x23, x20, eq",
-        "orr w22, w20, w22",
-        "lsr w25, w21, #1",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #2",
-        "lsr w25, w21, #2",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #4",
-        "lsr w25, w21, #3",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #6",
-        "lsr w25, w21, #4",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #8",
-        "lsr w25, w21, #5",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #10",
-        "lsr w25, w21, #6",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #12",
-        "lsr w21, w21, #7",
-        "and w21, w21, #0x1",
-        "cmp x21, #0x0 (0)",
-        "csel x21, x23, x20, eq",
-        "orr w21, w22, w21, lsl #14",
-        "str w21, [x4, #8]",
-        "str w20, [x4, #12]",
-        "str w20, [x4, #16]",
-        "str w20, [x4, #20]",
-        "str w20, [x4, #24]",
-        "msr nzcv, x24"
+        "csel x23, x24, x21, eq",
+        "orr w22, w30, w23, lsl #14",
+        "str w22, [x20, #8]",
+        "str w21, [x20, #12]",
+        "str w21, [x20, #16]",
+        "str w21, [x20, #20]",
+        "str w21, [x20, #24]",
+        "msr nzcv, x25"
       ]
     },
     "fnstcw [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0xd9 !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrh w20, [x28, #1024]",
-        "strh w20, [x4]"
+        "mov x21, x4",
+        "strh w20, [x21]"
       ]
     },
     "fld st0": {
@@ -4056,15 +4070,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -4080,14 +4094,14 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -4102,15 +4116,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -4125,15 +4139,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -4148,15 +4162,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -4171,15 +4185,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -4194,15 +4208,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -4217,15 +4231,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -4240,14 +4254,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -4259,14 +4273,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -4278,14 +4292,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -4297,14 +4311,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -4316,14 +4330,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -4335,14 +4349,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -4354,14 +4368,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -4373,14 +4387,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -4392,7 +4406,7 @@
       "ExpectedArm64ASM": []
     },
     "fchs": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "0xd9 11b 0xe0 /4"
       ],
@@ -4403,14 +4417,15 @@
         "mov w21, #0x0",
         "mov w22, #0x8000",
         "fmov d3, x21",
-        "mov v3.d[1], x22",
-        "eor v2.16b, v2.16b, v3.16b",
+        "mov v4.16b, v3.16b",
+        "mov v4.d[1], x22",
+        "eor v3.16b, v2.16b, v4.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fabs": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "0xd9 11b 0xe1 /4"
       ],
@@ -4421,10 +4436,11 @@
         "mov x21, #0xffffffffffffffff",
         "mov w22, #0x7fff",
         "fmov d3, x21",
-        "mov v3.d[1], x22",
-        "and v2.16b, v2.16b, v3.16b",
+        "mov v4.16b, v3.16b",
+        "mov v4.d[1], x22",
+        "and v3.16b, v2.16b, v4.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "ftst": {
@@ -4468,13 +4484,13 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
         "strb w20, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]"
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]"
       ]
     },
     "fxam": {
@@ -4487,11 +4503,11 @@
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
         "mov x21, v2.d[1]",
-        "ubfx x21, x21, #15, #1",
-        "strb w21, [x28, #745]",
+        "ubfx x22, x21, #15, #1",
+        "strb w22, [x28, #745]",
         "ldrb w21, [x28, #1026]",
-        "lsr w20, w21, w20",
-        "and w20, w20, #0x1",
+        "lsr w22, w21, w20",
+        "and w20, w22, #0x1",
         "mrs x21, nzcv",
         "cmp x20, #0x1 (1)",
         "cset x22, ne",
@@ -4509,11 +4525,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "ldr q2, [x28, #2304]",
@@ -4529,11 +4545,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "ldr q2, [x28, #2320]",
@@ -4549,11 +4565,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "ldr q2, [x28, #2336]",
@@ -4569,11 +4585,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "ldr q2, [x28, #2352]",
@@ -4589,11 +4605,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "ldr q2, [x28, #2368]",
@@ -4609,11 +4625,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "ldr q2, [x28, #2384]",
@@ -4629,11 +4645,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "movi v2.2d, #0x0",
@@ -4675,11 +4691,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fyl2x": {
@@ -4691,15 +4707,15 @@
         "ldrb w20, [x28, #747]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "strb w21, [x28, #747]",
+        "and w22, w21, #0x7",
+        "strb w22, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -4728,11 +4744,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fptan": {
@@ -4744,12 +4760,12 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "sub w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w22",
-        "orr w21, w23, w21",
+        "and w23, w22, #0x7",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w23",
+        "orr w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "strb w22, [x28, #747]",
+        "strb w23, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
@@ -4777,16 +4793,16 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldr q3, [x28, #2304]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "ldr q2, [x28, #2304]",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
-        "str q3, [x0, #768]"
+        "str q3, [x0, #768]",
+        "add x0, x28, x23, lsl #4",
+        "str q2, [x0, #768]"
       ]
     },
     "fpatan": {
@@ -4798,15 +4814,15 @@
         "ldrb w20, [x28, #747]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "strb w21, [x28, #747]",
+        "and w22, w21, #0x7",
+        "strb w22, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -4835,11 +4851,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fxtract": {
@@ -4851,12 +4867,12 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "sub w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w22",
-        "orr w21, w23, w21",
+        "and w23, w22, #0x7",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w23",
+        "orr w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "strb w22, [x28, #747]",
+        "strb w23, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
@@ -4912,13 +4928,13 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fprem1": {
@@ -4929,10 +4945,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -4961,13 +4977,13 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdecstp": {
@@ -4977,8 +4993,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -4989,8 +5005,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -5002,10 +5018,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -5034,13 +5050,13 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fyl2xp1": {
@@ -5052,15 +5068,15 @@
         "ldrb w20, [x28, #747]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "strb w21, [x28, #747]",
+        "and w22, w21, #0x7",
+        "strb w22, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "ldr q4, [x28, #2304]",
         "mrs x0, nzcv",
@@ -5090,9 +5106,9 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v5.16b, v5.16b, v5.16b",
+        "mov v5.d[0], x0",
+        "mov v5.h[4], w1",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5105,8 +5121,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
+        "mov x1, v5.d[0]",
+        "umov w2, v5.h[4]",
         "mov x3, v3.d[0]",
         "umov w4, v3.h[4]",
         "ldr x5, [x28, #1440]",
@@ -5123,7 +5139,7 @@
         "eor v2.16b, v2.16b, v2.16b",
         "mov v2.d[0], x0",
         "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -5161,11 +5177,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fsincos": {
@@ -5177,12 +5193,12 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "sub w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w22",
-        "orr w21, w23, w21",
+        "and w23, w22, #0x7",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w23",
+        "orr w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "strb w22, [x28, #747]",
+        "strb w23, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
@@ -5238,15 +5254,15 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "frndint": {
@@ -5283,11 +5299,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fscale": {
@@ -5298,10 +5314,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -5330,11 +5346,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsin": {
@@ -5371,13 +5387,13 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fcos": {
@@ -5414,23 +5430,24 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fiadd dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xda !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
+        "mov x21, x4",
+        "ldr w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5442,7 +5459,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "mov w1, w21",
+        "mov w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1320]",
         "blr x2",
@@ -5487,21 +5504,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fimul dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xda !11b /1"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
+        "mov x21, x4",
+        "ldr w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5513,7 +5531,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "mov w1, w21",
+        "mov w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1320]",
         "blr x2",
@@ -5558,21 +5576,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "ficom dword [rax]": {
-      "ExpectedInstructionCount": 69,
+      "ExpectedInstructionCount": 70,
       "Comment": [
         "0xda !11b /2"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
+        "mov x21, x4",
+        "ldr w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5584,7 +5603,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "mov w1, w21",
+        "mov w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1320]",
         "blr x2",
@@ -5632,24 +5651,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "ficomp dword [rax]": {
-      "ExpectedInstructionCount": 77,
+      "ExpectedInstructionCount": 78,
       "Comment": [
         "0xda !11b /3"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
+        "mov x21, x4",
+        "ldr w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5661,7 +5681,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "mov w1, w21",
+        "mov w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1320]",
         "blr x2",
@@ -5709,32 +5729,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fisub dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xda !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
+        "mov x21, x4",
+        "ldr w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5746,7 +5767,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "mov w1, w21",
+        "mov w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1320]",
         "blr x2",
@@ -5791,21 +5812,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fisubr dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xda !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
+        "mov x21, x4",
+        "ldr w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5817,7 +5839,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "mov w1, w21",
+        "mov w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1320]",
         "blr x2",
@@ -5862,21 +5884,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fidiv dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xda !11b /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
+        "mov x21, x4",
+        "ldr w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5888,7 +5911,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "mov w1, w21",
+        "mov w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1320]",
         "blr x2",
@@ -5933,21 +5956,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fidivr dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xda !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
+        "mov x21, x4",
+        "ldr w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5959,7 +5983,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "mov w1, w21",
+        "mov w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1320]",
         "blr x2",
@@ -6004,15 +6028,15 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fcmovb st0, st0": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc0 /0"
       ],
@@ -6021,18 +6045,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc1 /0"
       ],
@@ -6041,18 +6066,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st2": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc2 /0"
       ],
@@ -6061,18 +6087,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st3": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc3 /0"
       ],
@@ -6081,18 +6108,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st4": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc4 /0"
       ],
@@ -6101,18 +6129,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st5": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc5 /0"
       ],
@@ -6121,18 +6150,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st6": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc6 /0"
       ],
@@ -6141,18 +6171,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st7": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc7 /0"
       ],
@@ -6161,18 +6192,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st0": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc8 /1"
       ],
@@ -6181,18 +6213,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc9 /1"
       ],
@@ -6201,18 +6234,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st2": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xca /1"
       ],
@@ -6221,18 +6255,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st3": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xcb /1"
       ],
@@ -6241,18 +6276,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st4": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xcc /1"
       ],
@@ -6261,18 +6297,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st5": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xcd /1"
       ],
@@ -6281,18 +6318,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st6": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xce /1"
       ],
@@ -6301,18 +6339,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st7": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xcf /1"
       ],
@@ -6321,398 +6360,423 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st0": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd0 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st1": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd1 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st2": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd2 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st3": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd3 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st4": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd4 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st5": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd5 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st6": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd6 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st7": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd7 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovu st0, st0": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xd8 /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x0 (0)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st1": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xd9 /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x1 (1)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st2": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xda /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x2 (2)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x2 (2)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st3": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xdb /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x3 (3)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x3 (3)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st4": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xdc /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x4 (4)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x4 (4)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st5": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xdd /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x5 (5)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x5 (5)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st6": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xde /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x6 (6)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x6 (6)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st7": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xdf /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x7 (7)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x7 (7)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fucompp": {
@@ -6724,8 +6788,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -6759,73 +6823,78 @@
         "mov x22, x0",
         "ubfx x23, x22, #1, #1",
         "ubfx x24, x22, #0, #1",
-        "ubfx x22, x22, #2, #1",
-        "orr w23, w23, w22",
-        "orr w24, w24, w22",
-        "strb w23, [x28, #744]",
-        "mov w23, #0x0",
-        "strb w23, [x28, #745]",
-        "strb w22, [x28, #746]",
-        "strb w24, [x28, #750]",
+        "ubfx x25, x22, #2, #1",
+        "orr w22, w23, w25",
+        "orr w23, w24, w25",
+        "strb w22, [x28, #744]",
+        "mov w22, #0x0",
+        "strb w22, [x28, #745]",
+        "strb w25, [x28, #746]",
+        "strb w23, [x28, #750]",
         "ldrb w22, [x28, #1026]",
         "lsl w23, w21, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "bic w24, w22, w23",
+        "strb w24, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "lsl w22, w21, w20",
+        "bic w21, w24, w22",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fild dword [rax]": {
-      "ExpectedInstructionCount": 35,
+      "ExpectedInstructionCount": 40,
       "Comment": [
         "0xdf !11b /5"
       ],
       "ExpectedArm64ASM": [
+        "sub sp, sp, #0x20 (32)",
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "mov w22, #0x0",
-        "sxtw x21, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "mov w21, #0x0",
+        "sxtw x23, w22",
         "mrs x22, nzcv",
-        "cmp x21, #0x0 (0)",
-        "mov w23, #0x8000",
-        "csel x23, x23, xzr, lt",
-        "cneg x21, x21, mi",
-        "mov w24, #0x3f",
+        "cmp x23, #0x0 (0)",
+        "mov w24, #0x8000",
+        "csel x25, x24, xzr, lt",
+        "cneg x24, x23, mi",
+        "mov w23, #0x3f",
         "mov x0, #0x3f",
-        "clz x25, x21",
-        "sub x25, x0, x25",
-        "sub x24, x24, x25",
-        "lsl x25, x21, x24",
+        "clz x30, x24",
+        "sub x30, x0, x30",
+        "sub x18, x23, x30",
+        "lsl x23, x24, x18",
         "mov w30, #0x403e",
-        "sub x24, x30, x24",
-        "mov w30, #0x0",
-        "cmp x21, #0x0 (0)",
-        "csel x21, x30, x24, eq",
-        "orr x21, x23, x21",
-        "fmov d2, x25",
+        "str w22, [sp]",
+        "sub x22, x30, x18",
+        "cmp x24, #0x0 (0)",
+        "csel x30, x21, x22, eq",
+        "orr x21, x25, x30",
+        "fmov d2, x23",
         "fmov d3, x21",
-        "mov v2.d[1], v3.d[0]",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[1], v3.d[0]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x22"
+        "str q4, [x0, #768]",
+        "ldr w20, [sp]",
+        "msr nzcv, x20",
+        "add sp, sp, #0x20 (32)"
       ]
     },
     "fisttp dword [rax]": {
-      "ExpectedInstructionCount": 38,
+      "ExpectedInstructionCount": 39,
       "Comment": [
         "0xdb !11b /1"
       ],
@@ -6859,19 +6928,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "mov w21, w0",
-        "str w21, [x4]",
+        "mov x22, x4",
+        "str w21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fist dword [rax]": {
-      "ExpectedInstructionCount": 30,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "0xdb !11b /2"
       ],
@@ -6905,11 +6975,12 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "mov w20, w0",
-        "str w20, [x4]"
+        "mov x21, x4",
+        "str w20, [x21]"
       ]
     },
     "fistp dword [rax]": {
-      "ExpectedInstructionCount": 38,
+      "ExpectedInstructionCount": 39,
       "Comment": [
         "0xdf !11b /7"
       ],
@@ -6943,31 +7014,33 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "mov w21, w0",
-        "str w21, [x4]",
+        "mov x22, x4",
+        "str w21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fld tword [rax]": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr q2, [x4]",
+        "mov x21, x4",
+        "ldr q2, [x21]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -6975,7 +7048,7 @@
       ]
     },
     "fstp tword [rax]": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xdb !11b /7"
       ],
@@ -6983,21 +7056,22 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "str d2, [x4]",
-        "mov x21, v2.d[1]",
-        "strh w21, [x4, #8]",
+        "mov x21, x4",
+        "str d2, [x21]",
+        "mov x22, v2.d[1]",
+        "strh w22, [x21, #8]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcmovnb st0, st0": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc0 /0"
       ],
@@ -7006,18 +7080,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc1 /0"
       ],
@@ -7026,18 +7101,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st2": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc2 /0"
       ],
@@ -7046,18 +7122,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st3": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc3 /0"
       ],
@@ -7066,18 +7143,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st4": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc4 /0"
       ],
@@ -7086,18 +7164,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st5": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc5 /0"
       ],
@@ -7106,18 +7185,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st6": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc6 /0"
       ],
@@ -7126,18 +7206,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st7": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc7 /0"
       ],
@@ -7146,18 +7227,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st0": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc8 /1"
       ],
@@ -7166,18 +7248,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc9 /1"
       ],
@@ -7186,18 +7269,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st2": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xca /1"
       ],
@@ -7206,18 +7290,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st3": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xcb /1"
       ],
@@ -7226,18 +7311,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st4": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xcc /1"
       ],
@@ -7246,18 +7332,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st5": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xcd /1"
       ],
@@ -7266,18 +7353,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st6": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xce /1"
       ],
@@ -7286,18 +7374,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st7": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xcf /1"
       ],
@@ -7306,390 +7395,415 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st0": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd0 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st1": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd1 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st2": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd2 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st3": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd3 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st4": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd4 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st5": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd5 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st6": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd6 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st7": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd7 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnu st0, st0": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xd8 /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x0 (0)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st1": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xd9 /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x1 (1)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st2": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xda /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x2 (2)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x2 (2)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st3": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xdb /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x3 (3)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x3 (3)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st4": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xdc /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x4 (4)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x4 (4)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st5": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xdd /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x5 (5)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x5 (5)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st6": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xde /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x6 (6)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x6 (6)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st7": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xdf /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x7 (7)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x7 (7)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fnclex": {
@@ -7717,15 +7831,15 @@
       ]
     },
     "fucomi st0, st0": {
-      "ExpectedInstructionCount": 43,
+      "ExpectedInstructionCount": 44,
       "Comment": [
         "0xdb 11b 0xe8 /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -7759,24 +7873,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "rmif x21, #63, #nzCv",
-        "rmif x22, #62, #nZcv",
-        "eor w26, w20, #0x1"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "rmif x20, #63, #nzCv",
+        "rmif x21, #62, #nZcv",
+        "eor w20, w23, #0x1",
+        "mov x26, x20"
       ]
     },
     "fucomi st0, st1": {
-      "ExpectedInstructionCount": 43,
+      "ExpectedInstructionCount": 44,
       "Comment": [
         "0xdb 11b 0xe9 /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -7810,24 +7925,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "rmif x21, #63, #nzCv",
-        "rmif x22, #62, #nZcv",
-        "eor w26, w20, #0x1"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "rmif x20, #63, #nzCv",
+        "rmif x21, #62, #nZcv",
+        "eor w20, w23, #0x1",
+        "mov x26, x20"
       ]
     },
     "fucomi st0, st2": {
-      "ExpectedInstructionCount": 43,
+      "ExpectedInstructionCount": 44,
       "Comment": [
         "0xdb 11b 0xea /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -7861,24 +7977,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "rmif x21, #63, #nzCv",
-        "rmif x22, #62, #nZcv",
-        "eor w26, w20, #0x1"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "rmif x20, #63, #nzCv",
+        "rmif x21, #62, #nZcv",
+        "eor w20, w23, #0x1",
+        "mov x26, x20"
       ]
     },
     "fucomi st0, st3": {
-      "ExpectedInstructionCount": 43,
+      "ExpectedInstructionCount": 44,
       "Comment": [
         "0xdb 11b 0xeb /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -7912,24 +8029,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "rmif x21, #63, #nzCv",
-        "rmif x22, #62, #nZcv",
-        "eor w26, w20, #0x1"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "rmif x20, #63, #nzCv",
+        "rmif x21, #62, #nZcv",
+        "eor w20, w23, #0x1",
+        "mov x26, x20"
       ]
     },
     "fucomi st0, st4": {
-      "ExpectedInstructionCount": 43,
+      "ExpectedInstructionCount": 44,
       "Comment": [
         "0xdb 11b 0xec /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -7963,24 +8081,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "rmif x21, #63, #nzCv",
-        "rmif x22, #62, #nZcv",
-        "eor w26, w20, #0x1"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "rmif x20, #63, #nzCv",
+        "rmif x21, #62, #nZcv",
+        "eor w20, w23, #0x1",
+        "mov x26, x20"
       ]
     },
     "fucomi st0, st5": {
-      "ExpectedInstructionCount": 43,
+      "ExpectedInstructionCount": 44,
       "Comment": [
         "0xdb 11b 0xed /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8014,24 +8133,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "rmif x21, #63, #nzCv",
-        "rmif x22, #62, #nZcv",
-        "eor w26, w20, #0x1"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "rmif x20, #63, #nzCv",
+        "rmif x21, #62, #nZcv",
+        "eor w20, w23, #0x1",
+        "mov x26, x20"
       ]
     },
     "fucomi st0, st6": {
-      "ExpectedInstructionCount": 43,
+      "ExpectedInstructionCount": 44,
       "Comment": [
         "0xdb 11b 0xee /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8065,24 +8185,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "rmif x21, #63, #nzCv",
-        "rmif x22, #62, #nZcv",
-        "eor w26, w20, #0x1"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "rmif x20, #63, #nzCv",
+        "rmif x21, #62, #nZcv",
+        "eor w20, w23, #0x1",
+        "mov x26, x20"
       ]
     },
     "fucomi st0, st7": {
-      "ExpectedInstructionCount": 43,
+      "ExpectedInstructionCount": 44,
       "Comment": [
         "0xdb 11b 0xef /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8116,24 +8237,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "rmif x21, #63, #nzCv",
-        "rmif x22, #62, #nZcv",
-        "eor w26, w20, #0x1"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "rmif x20, #63, #nzCv",
+        "rmif x21, #62, #nZcv",
+        "eor w20, w23, #0x1",
+        "mov x26, x20"
       ]
     },
     "fcomi st0, st0": {
-      "ExpectedInstructionCount": 43,
+      "ExpectedInstructionCount": 44,
       "Comment": [
         "0xdb 11b 0xf0 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8167,24 +8289,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "rmif x21, #63, #nzCv",
-        "rmif x22, #62, #nZcv",
-        "eor w26, w20, #0x1"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "rmif x20, #63, #nzCv",
+        "rmif x21, #62, #nZcv",
+        "eor w20, w23, #0x1",
+        "mov x26, x20"
       ]
     },
     "fcomi st0, st1": {
-      "ExpectedInstructionCount": 43,
+      "ExpectedInstructionCount": 44,
       "Comment": [
         "0xdb 11b 0xf1 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8218,24 +8341,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "rmif x21, #63, #nzCv",
-        "rmif x22, #62, #nZcv",
-        "eor w26, w20, #0x1"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "rmif x20, #63, #nzCv",
+        "rmif x21, #62, #nZcv",
+        "eor w20, w23, #0x1",
+        "mov x26, x20"
       ]
     },
     "fcomi st0, st2": {
-      "ExpectedInstructionCount": 43,
+      "ExpectedInstructionCount": 44,
       "Comment": [
         "0xdb 11b 0xf2 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8269,24 +8393,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "rmif x21, #63, #nzCv",
-        "rmif x22, #62, #nZcv",
-        "eor w26, w20, #0x1"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "rmif x20, #63, #nzCv",
+        "rmif x21, #62, #nZcv",
+        "eor w20, w23, #0x1",
+        "mov x26, x20"
       ]
     },
     "fcomi st0, st3": {
-      "ExpectedInstructionCount": 43,
+      "ExpectedInstructionCount": 44,
       "Comment": [
         "0xdb 11b 0xf3 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8320,24 +8445,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "rmif x21, #63, #nzCv",
-        "rmif x22, #62, #nZcv",
-        "eor w26, w20, #0x1"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "rmif x20, #63, #nzCv",
+        "rmif x21, #62, #nZcv",
+        "eor w20, w23, #0x1",
+        "mov x26, x20"
       ]
     },
     "fcomi st0, st4": {
-      "ExpectedInstructionCount": 43,
+      "ExpectedInstructionCount": 44,
       "Comment": [
         "0xdb 11b 0xf4 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8371,24 +8497,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "rmif x21, #63, #nzCv",
-        "rmif x22, #62, #nZcv",
-        "eor w26, w20, #0x1"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "rmif x20, #63, #nzCv",
+        "rmif x21, #62, #nZcv",
+        "eor w20, w23, #0x1",
+        "mov x26, x20"
       ]
     },
     "fcomi st0, st5": {
-      "ExpectedInstructionCount": 43,
+      "ExpectedInstructionCount": 44,
       "Comment": [
         "0xdb 11b 0xf5 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8422,24 +8549,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "rmif x21, #63, #nzCv",
-        "rmif x22, #62, #nZcv",
-        "eor w26, w20, #0x1"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "rmif x20, #63, #nzCv",
+        "rmif x21, #62, #nZcv",
+        "eor w20, w23, #0x1",
+        "mov x26, x20"
       ]
     },
     "fcomi st0, st6": {
-      "ExpectedInstructionCount": 43,
+      "ExpectedInstructionCount": 44,
       "Comment": [
         "0xdb 11b 0xf6 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8473,24 +8601,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "rmif x21, #63, #nzCv",
-        "rmif x22, #62, #nZcv",
-        "eor w26, w20, #0x1"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "rmif x20, #63, #nzCv",
+        "rmif x21, #62, #nZcv",
+        "eor w20, w23, #0x1",
+        "mov x26, x20"
       ]
     },
     "fcomi st0, st7": {
-      "ExpectedInstructionCount": 43,
+      "ExpectedInstructionCount": 44,
       "Comment": [
         "0xdb 11b 0xf7 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8524,22 +8653,24 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "rmif x21, #63, #nzCv",
-        "rmif x22, #62, #nZcv",
-        "eor w26, w20, #0x1"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "rmif x20, #63, #nzCv",
+        "rmif x21, #62, #nZcv",
+        "eor w20, w23, #0x1",
+        "mov x26, x20"
       ]
     },
     "fadd qword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xdc !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8564,11 +8695,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8581,10 +8712,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1408]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -8596,21 +8727,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul qword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xdc !11b /1"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8635,11 +8767,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8652,10 +8784,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1424]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -8667,21 +8799,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fcom qword [rax]": {
-      "ExpectedInstructionCount": 69,
+      "ExpectedInstructionCount": 70,
       "Comment": [
         "0xdc !11b /2"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8706,11 +8839,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8723,10 +8856,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1304]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -8741,24 +8874,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcomp qword [rax]": {
-      "ExpectedInstructionCount": 77,
+      "ExpectedInstructionCount": 78,
       "Comment": [
         "0xdc !11b /3"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8783,11 +8917,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8800,10 +8934,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1304]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -8818,32 +8952,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fsub qword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xdc !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8868,11 +9003,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8885,10 +9020,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1416]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -8900,21 +9035,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr qword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xdc !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8939,11 +9075,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8956,10 +9092,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
-        "mov x3, v3.d[0]",
-        "umov w4, v3.h[4]",
+        "mov x1, v3.d[0]",
+        "umov w2, v3.h[4]",
+        "mov x3, v2.d[0]",
+        "umov w4, v2.h[4]",
         "ldr x5, [x28, #1416]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -8971,21 +9107,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv qword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xdc !11b /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -9010,11 +9147,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -9027,10 +9164,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1432]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -9042,21 +9179,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr qword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xdc !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -9081,11 +9219,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -9098,10 +9236,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
-        "mov x3, v3.d[0]",
-        "umov w4, v3.h[4]",
+        "mov x1, v3.d[0]",
+        "umov w2, v3.h[4]",
+        "mov x3, v2.d[0]",
+        "umov w4, v2.h[4]",
         "ldr x5, [x28, #1432]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -9113,11 +9251,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xc0": {
@@ -9130,8 +9268,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9162,11 +9300,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st1, st0": {
@@ -9177,8 +9315,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9209,11 +9347,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st2, st0": {
@@ -9224,8 +9362,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9256,11 +9394,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st3, st0": {
@@ -9271,8 +9409,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9303,11 +9441,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st4, st0": {
@@ -9318,8 +9456,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9350,11 +9488,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st5, st0": {
@@ -9365,8 +9503,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9397,11 +9535,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st6, st0": {
@@ -9412,8 +9550,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9444,11 +9582,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st7, st0": {
@@ -9459,8 +9597,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9491,11 +9629,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xc8": {
@@ -9508,8 +9646,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9540,11 +9678,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st1, st0": {
@@ -9555,8 +9693,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9587,11 +9725,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st2, st0": {
@@ -9602,8 +9740,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9634,11 +9772,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st3, st0": {
@@ -9649,8 +9787,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9681,11 +9819,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st4, st0": {
@@ -9696,8 +9834,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9728,11 +9866,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st5, st0": {
@@ -9743,8 +9881,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9775,11 +9913,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st6, st0": {
@@ -9790,8 +9928,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9822,11 +9960,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st7, st0": {
@@ -9837,8 +9975,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9869,11 +10007,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xe0": {
@@ -9886,8 +10024,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9918,11 +10056,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st1, st0": {
@@ -9933,8 +10071,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9965,11 +10103,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st2, st0": {
@@ -9980,8 +10118,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10012,11 +10150,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st3, st0": {
@@ -10027,8 +10165,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10059,11 +10197,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st4, st0": {
@@ -10074,8 +10212,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10106,11 +10244,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st5, st0": {
@@ -10121,8 +10259,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10153,11 +10291,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st6, st0": {
@@ -10168,8 +10306,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10200,11 +10338,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st7, st0": {
@@ -10215,8 +10353,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10247,11 +10385,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xe8": {
@@ -10264,8 +10402,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10296,11 +10434,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st1, st0": {
@@ -10311,8 +10449,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10343,11 +10481,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st2, st0": {
@@ -10358,8 +10496,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10390,11 +10528,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st3, st0": {
@@ -10405,8 +10543,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10437,11 +10575,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st4, st0": {
@@ -10452,8 +10590,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10484,11 +10622,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st5, st0": {
@@ -10499,8 +10637,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10531,11 +10669,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st6, st0": {
@@ -10546,8 +10684,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10578,11 +10716,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st7, st0": {
@@ -10593,8 +10731,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10625,11 +10763,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xf0": {
@@ -10642,8 +10780,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10674,11 +10812,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st1, st0": {
@@ -10689,8 +10827,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10721,11 +10859,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st2, st0": {
@@ -10736,8 +10874,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10768,11 +10906,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st3, st0": {
@@ -10783,8 +10921,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10815,11 +10953,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st4, st0": {
@@ -10830,8 +10968,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10862,11 +11000,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st5, st0": {
@@ -10877,8 +11015,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10909,11 +11047,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st6, st0": {
@@ -10924,8 +11062,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10956,11 +11094,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st7, st0": {
@@ -10971,8 +11109,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11003,11 +11141,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xf8": {
@@ -11020,8 +11158,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11052,11 +11190,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st1, st0": {
@@ -11067,8 +11205,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11099,11 +11237,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st2, st0": {
@@ -11114,8 +11252,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11146,11 +11284,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st3, st0": {
@@ -11161,8 +11299,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11193,11 +11331,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st4, st0": {
@@ -11208,8 +11346,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11240,11 +11378,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st5, st0": {
@@ -11255,8 +11393,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11287,11 +11425,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st6, st0": {
@@ -11302,8 +11440,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11334,11 +11472,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st7, st0": {
@@ -11349,8 +11487,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11381,21 +11519,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fld qword [rax]": {
-      "ExpectedInstructionCount": 39,
+      "ExpectedInstructionCount": 40,
       "Comment": [
         "0xdd !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -11420,23 +11559,23 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fisttp qword [rax]": {
-      "ExpectedInstructionCount": 38,
+      "ExpectedInstructionCount": 39,
       "Comment": [
         "0xdd !11b /1"
       ],
@@ -11470,19 +11609,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "mov x21, x0",
-        "str x21, [x4]",
+        "mov x22, x4",
+        "str x21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fst qword [rax]": {
-      "ExpectedInstructionCount": 30,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "0xdd !11b /2"
       ],
@@ -11515,12 +11655,13 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v2.8b, v0.8b",
-        "str d2, [x4]"
+        "mov v3.8b, v0.8b",
+        "mov x20, x4",
+        "str d3, [x20]"
       ]
     },
     "fstp qword [rax]": {
-      "ExpectedInstructionCount": 38,
+      "ExpectedInstructionCount": 39,
       "Comment": [
         "0xdd !11b /3"
       ],
@@ -11553,278 +11694,289 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v2.8b, v0.8b",
-        "str d2, [x4]",
+        "mov v3.8b, v0.8b",
+        "mov x21, x4",
+        "str d3, [x21]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "frstor [rax]": {
-      "ExpectedInstructionCount": 107,
+      "ExpectedInstructionCount": 110,
       "Comment": [
         "0xdd !11b /4"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x4]",
-        "strh w20, [x28, #1024]",
-        "ldr w20, [x4, #4]",
-        "ubfx w21, w20, #11, #3",
-        "strb w21, [x28, #747]",
-        "ubfx w22, w20, #8, #1",
-        "ubfx w23, w20, #9, #1",
-        "ubfx w24, w20, #10, #1",
-        "ubfx w20, w20, #14, #1",
-        "strb w22, [x28, #744]",
-        "strb w23, [x28, #745]",
-        "strb w24, [x28, #746]",
-        "strb w20, [x28, #750]",
-        "ldr w20, [x4, #8]",
-        "ubfx w22, w20, #0, #2",
-        "mrs x23, nzcv",
-        "cmp x22, #0x3 (3)",
-        "cset x22, ne",
-        "ubfx w24, w20, #2, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #1",
-        "ubfx w24, w20, #4, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #2",
-        "ubfx w24, w20, #6, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #3",
-        "ubfx w24, w20, #8, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #4",
-        "ubfx w24, w20, #10, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #5",
-        "ubfx w24, w20, #12, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #6",
-        "ubfx w20, w20, #14, #2",
-        "cmp x20, #0x3 (3)",
-        "cset x20, ne",
-        "orr w20, w22, w20, lsl #7",
-        "strb w20, [x28, #1026]",
-        "add x20, x4, #0x1c (28)",
-        "mov x22, #0xffffffffffffffff",
-        "mov w24, #0xffff",
-        "fmov d2, x22",
-        "mov v2.d[1], x24",
-        "ldur q3, [x4, #28]",
-        "and v3.16b, v3.16b, v2.16b",
-        "add x0, x28, x21, lsl #4",
-        "str q3, [x0, #768]",
-        "add x22, x20, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x20, #10]",
-        "and v3.16b, v3.16b, v2.16b",
-        "add x0, x28, x21, lsl #4",
-        "str q3, [x0, #768]",
-        "add x20, x22, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x22, #10]",
-        "and v3.16b, v3.16b, v2.16b",
-        "add x0, x28, x21, lsl #4",
-        "str q3, [x0, #768]",
-        "add x22, x20, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x20, #10]",
-        "and v3.16b, v3.16b, v2.16b",
-        "add x0, x28, x21, lsl #4",
-        "str q3, [x0, #768]",
-        "add x20, x22, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x22, #10]",
-        "and v3.16b, v3.16b, v2.16b",
-        "add x0, x28, x21, lsl #4",
-        "str q3, [x0, #768]",
-        "add x22, x20, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x20, #10]",
-        "and v3.16b, v3.16b, v2.16b",
-        "add x0, x28, x21, lsl #4",
-        "str q3, [x0, #768]",
-        "add x20, x22, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x22, #10]",
-        "and v2.16b, v3.16b, v2.16b",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]",
-        "add x22, x20, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur d2, [x20, #10]",
-        "ldr h3, [x22, #8]",
-        "mov v2.h[4], v3.h[0]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x23"
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "strh w21, [x28, #1024]",
+        "ldr w21, [x20, #4]",
+        "ubfx w22, w21, #11, #3",
+        "strb w22, [x28, #747]",
+        "ubfx w23, w21, #8, #1",
+        "ubfx w24, w21, #9, #1",
+        "ubfx w25, w21, #10, #1",
+        "ubfx w30, w21, #14, #1",
+        "strb w23, [x28, #744]",
+        "strb w24, [x28, #745]",
+        "strb w25, [x28, #746]",
+        "strb w30, [x28, #750]",
+        "ldr w21, [x20, #8]",
+        "ubfx w23, w21, #0, #2",
+        "mrs x24, nzcv",
+        "cmp x23, #0x3 (3)",
+        "cset x25, ne",
+        "ubfx w23, w21, #2, #2",
+        "cmp x23, #0x3 (3)",
+        "cset x30, ne",
+        "orr w23, w25, w30, lsl #1",
+        "ubfx w25, w21, #4, #2",
+        "cmp x25, #0x3 (3)",
+        "cset x30, ne",
+        "orr w25, w23, w30, lsl #2",
+        "ubfx w23, w21, #6, #2",
+        "cmp x23, #0x3 (3)",
+        "cset x30, ne",
+        "orr w23, w25, w30, lsl #3",
+        "ubfx w25, w21, #8, #2",
+        "cmp x25, #0x3 (3)",
+        "cset x30, ne",
+        "orr w25, w23, w30, lsl #4",
+        "ubfx w23, w21, #10, #2",
+        "cmp x23, #0x3 (3)",
+        "cset x30, ne",
+        "orr w23, w25, w30, lsl #5",
+        "ubfx w25, w21, #12, #2",
+        "cmp x25, #0x3 (3)",
+        "cset x30, ne",
+        "orr w25, w23, w30, lsl #6",
+        "ubfx w23, w21, #14, #2",
+        "cmp x23, #0x3 (3)",
+        "cset x21, ne",
+        "orr w23, w25, w21, lsl #7",
+        "strb w23, [x28, #1026]",
+        "add x21, x20, #0x1c (28)",
+        "mov x23, #0xffffffffffffffff",
+        "mov w25, #0xffff",
+        "fmov d2, x23",
+        "mov v3.16b, v2.16b",
+        "mov v3.d[1], x25",
+        "ldur q2, [x20, #28]",
+        "and v4.16b, v2.16b, v3.16b",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]",
+        "add x20, x21, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x21, #10]",
+        "and v4.16b, v2.16b, v3.16b",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]",
+        "add x21, x20, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x20, #10]",
+        "and v4.16b, v2.16b, v3.16b",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]",
+        "add x20, x21, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x21, #10]",
+        "and v4.16b, v2.16b, v3.16b",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]",
+        "add x21, x20, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x20, #10]",
+        "and v4.16b, v2.16b, v3.16b",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]",
+        "add x20, x21, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x21, #10]",
+        "and v4.16b, v2.16b, v3.16b",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]",
+        "add x21, x20, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x20, #10]",
+        "and v4.16b, v2.16b, v3.16b",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]",
+        "add x20, x21, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur d2, [x21, #10]",
+        "ldr h3, [x20, #8]",
+        "mov v4.16b, v2.16b",
+        "mov v4.h[4], v3.h[0]",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]",
+        "msr nzcv, x24"
       ]
     },
     "fnsave [rax]": {
-      "ExpectedInstructionCount": 119,
+      "ExpectedInstructionCount": 124,
       "Comment": [
         "0xdd !11b /6"
       ],
       "ExpectedArm64ASM": [
-        "ldrb w20, [x28, #747]",
-        "ldrh w21, [x28, #1024]",
-        "str w21, [x4]",
-        "mov w21, #0x0",
-        "mov x22, x21",
-        "bfi x22, x20, #11, #3",
-        "ldrb w23, [x28, #744]",
-        "ldrb w24, [x28, #745]",
-        "ldrb w25, [x28, #746]",
-        "ldrb w30, [x28, #750]",
-        "orr x22, x22, x23, lsl #8",
-        "orr x22, x22, x24, lsl #9",
-        "orr x22, x22, x25, lsl #10",
-        "orr x22, x22, x30, lsl #14",
-        "str w22, [x4, #4]",
-        "ldrb w22, [x28, #1026]",
-        "and w23, w22, #0x1",
+        "sub sp, sp, #0x20 (32)",
+        "mov x20, x4",
+        "ldrb w21, [x28, #747]",
+        "ldrh w22, [x28, #1024]",
+        "str w22, [x20]",
+        "mov w22, #0x0",
+        "mov x23, x22",
+        "bfi x23, x21, #11, #3",
+        "ldrb w24, [x28, #744]",
+        "ldrb w25, [x28, #745]",
+        "ldrb w30, [x28, #746]",
+        "ldrb w18, [x28, #750]",
+        "strb w21, [sp]",
+        "orr x21, x23, x24, lsl #8",
+        "orr x23, x21, x25, lsl #9",
+        "orr x21, x23, x30, lsl #10",
+        "orr x23, x21, x18, lsl #14",
+        "str w23, [x20, #4]",
+        "ldrb w21, [x28, #1026]",
+        "and w23, w21, #0x1",
         "mov w24, #0x3",
         "mrs x25, nzcv",
         "cmp x23, #0x0 (0)",
-        "csel x23, x24, x21, eq",
-        "orr w23, w21, w23",
-        "lsr w30, w22, #1",
-        "and w30, w30, #0x1",
-        "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #2",
-        "lsr w30, w22, #2",
-        "and w30, w30, #0x1",
-        "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #4",
-        "lsr w30, w22, #3",
-        "and w30, w30, #0x1",
-        "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #6",
-        "lsr w30, w22, #4",
-        "and w30, w30, #0x1",
+        "csel x30, x24, x22, eq",
+        "orr w23, w22, w30",
+        "lsr w30, w21, #1",
+        "and w18, w30, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x30, x24, x22, eq",
+        "orr w18, w23, w30, lsl #2",
+        "lsr w23, w21, #2",
+        "and w30, w23, #0x1",
         "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #8",
-        "lsr w30, w22, #5",
-        "and w30, w30, #0x1",
+        "csel x23, x24, x22, eq",
+        "orr w30, w18, w23, lsl #4",
+        "lsr w23, w21, #3",
+        "and w18, w23, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x23, x24, x22, eq",
+        "orr w18, w30, w23, lsl #6",
+        "lsr w23, w21, #4",
+        "and w30, w23, #0x1",
         "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #10",
-        "lsr w30, w22, #6",
-        "and w30, w30, #0x1",
+        "csel x23, x24, x22, eq",
+        "orr w30, w18, w23, lsl #8",
+        "lsr w23, w21, #5",
+        "and w18, w23, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x23, x24, x22, eq",
+        "orr w18, w30, w23, lsl #10",
+        "lsr w23, w21, #6",
+        "and w30, w23, #0x1",
         "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #12",
-        "lsr w22, w22, #7",
-        "and w22, w22, #0x1",
-        "cmp x22, #0x0 (0)",
-        "csel x22, x24, x21, eq",
-        "orr w22, w23, w22, lsl #14",
-        "str w22, [x4, #8]",
-        "str w21, [x4, #12]",
-        "str w21, [x4, #16]",
-        "str w21, [x4, #20]",
-        "str w21, [x4, #24]",
-        "add x22, x4, #0x1c (28)",
-        "add x0, x28, x20, lsl #4",
-        "ldr q2, [x0, #768]",
-        "stur q2, [x4, #28]",
-        "add x23, x22, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
-        "ldr q2, [x0, #768]",
-        "stur q2, [x22, #10]",
-        "add x22, x23, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
-        "ldr q2, [x0, #768]",
-        "stur q2, [x23, #10]",
-        "add x23, x22, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
-        "ldr q2, [x0, #768]",
-        "stur q2, [x22, #10]",
-        "add x22, x23, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
-        "ldr q2, [x0, #768]",
-        "stur q2, [x23, #10]",
-        "add x23, x22, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
-        "ldr q2, [x0, #768]",
-        "stur q2, [x22, #10]",
-        "add x22, x23, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
-        "ldr q2, [x0, #768]",
-        "stur q2, [x23, #10]",
-        "add x23, x22, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
-        "ldr q2, [x0, #768]",
-        "stur d2, [x22, #10]",
-        "dup v2.8h, v2.h[4]",
-        "str h2, [x23, #8]",
+        "csel x23, x24, x22, eq",
+        "orr w30, w18, w23, lsl #12",
+        "lsr w23, w21, #7",
+        "and w21, w23, #0x1",
+        "cmp x21, #0x0 (0)",
+        "csel x23, x24, x22, eq",
+        "orr w21, w30, w23, lsl #14",
+        "str w21, [x20, #8]",
+        "str w22, [x20, #12]",
+        "str w22, [x20, #16]",
+        "str w22, [x20, #20]",
+        "str w22, [x20, #24]",
+        "add x21, x20, #0x1c (28)",
+        "ldrb w23, [sp]",
+        "add x0, x28, x23, lsl #4",
+        "ldr q2, [x0, #768]",
+        "stur q2, [x20, #28]",
+        "add x20, x21, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
+        "ldr q2, [x0, #768]",
+        "stur q2, [x21, #10]",
+        "add x21, x20, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
+        "ldr q2, [x0, #768]",
+        "stur q2, [x20, #10]",
+        "add x20, x21, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
+        "ldr q2, [x0, #768]",
+        "stur q2, [x21, #10]",
+        "add x21, x20, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
+        "ldr q2, [x0, #768]",
+        "stur q2, [x20, #10]",
+        "add x20, x21, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
+        "ldr q2, [x0, #768]",
+        "stur q2, [x21, #10]",
+        "add x21, x20, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
+        "ldr q2, [x0, #768]",
+        "stur q2, [x20, #10]",
+        "add x20, x21, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
+        "ldr q2, [x0, #768]",
+        "stur d2, [x21, #10]",
+        "dup v3.8h, v2.h[4]",
+        "str h3, [x20, #8]",
         "mov w20, #0x37f",
         "strh w20, [x28, #1024]",
-        "strb w21, [x28, #747]",
-        "strb w21, [x28, #744]",
-        "strb w21, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w21, [x28, #750]",
-        "strb w21, [x28, #1026]",
-        "msr nzcv, x25"
+        "strb w22, [x28, #747]",
+        "strb w22, [x28, #744]",
+        "strb w22, [x28, #745]",
+        "strb w22, [x28, #746]",
+        "strb w22, [x28, #750]",
+        "strb w22, [x28, #1026]",
+        "msr nzcv, x25",
+        "add sp, sp, #0x20 (32)"
       ]
     },
     "fnstsw [rax]": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdd !11b /7"
       ],
       "ExpectedArm64ASM": [
         "mov w20, #0x0",
         "ldrb w21, [x28, #747]",
-        "bfi x20, x21, #11, #3",
-        "ldrb w21, [x28, #744]",
-        "ldrb w22, [x28, #745]",
+        "mov x22, x20",
+        "bfi x22, x21, #11, #3",
+        "ldrb w20, [x28, #744]",
+        "ldrb w21, [x28, #745]",
         "ldrb w23, [x28, #746]",
         "ldrb w24, [x28, #750]",
-        "orr x20, x20, x21, lsl #8",
-        "orr x20, x20, x22, lsl #9",
-        "orr x20, x20, x23, lsl #10",
-        "orr x20, x20, x24, lsl #14",
-        "strh w20, [x4]"
+        "orr x25, x22, x20, lsl #8",
+        "orr x20, x25, x21, lsl #9",
+        "orr x21, x20, x23, lsl #10",
+        "orr x20, x21, x24, lsl #14",
+        "mov x21, x4",
+        "strh w20, [x21]"
       ]
     },
     "ffree st0": {
@@ -11834,12 +11986,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x0 (0)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x0 (0)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -11851,11 +12003,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w20, w21, w20",
-        "bic w20, w22, w20",
+        "lsl w23, w21, w20",
+        "bic w20, w22, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -11866,12 +12018,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x2 (2)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x2 (2)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -11882,12 +12034,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x3 (3)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x3 (3)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -11898,12 +12050,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x4 (4)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x4 (4)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -11914,12 +12066,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x5 (5)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x5 (5)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -11930,12 +12082,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x6 (6)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x6 (6)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -11946,12 +12098,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x7 (7)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x7 (7)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -11963,10 +12115,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -11978,10 +12130,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -11993,10 +12145,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -12008,10 +12160,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -12023,10 +12175,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -12038,10 +12190,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -12053,10 +12205,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -12068,10 +12220,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -12083,18 +12235,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12107,17 +12259,17 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
+        "and w23, w22, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12129,18 +12281,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12152,18 +12304,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12175,18 +12327,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12198,18 +12350,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12221,18 +12373,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12244,18 +12396,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12268,8 +12420,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x0",
         "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12303,13 +12455,13 @@
         "mov x20, x0",
         "ubfx x22, x20, #1, #1",
         "ubfx x23, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w22, w22, w20",
-        "orr w23, w23, w20",
-        "strb w22, [x28, #744]",
+        "ubfx x24, x20, #2, #1",
+        "orr w20, w22, w24",
+        "orr w22, w23, w24",
+        "strb w20, [x28, #744]",
         "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w23, [x28, #750]"
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]"
       ]
     },
     "fucom st1": {
@@ -12320,8 +12472,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12355,14 +12507,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fucom st2": {
@@ -12373,8 +12525,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12408,14 +12560,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fucom st3": {
@@ -12426,8 +12578,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12461,14 +12613,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fucom st4": {
@@ -12479,8 +12631,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12514,14 +12666,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fucom st5": {
@@ -12532,8 +12684,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12567,14 +12719,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fucom st6": {
@@ -12585,8 +12737,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12620,14 +12772,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fucom st7": {
@@ -12638,8 +12790,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12673,14 +12825,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fucomp st0": {
@@ -12692,8 +12844,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x0",
         "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12727,20 +12879,20 @@
         "mov x22, x0",
         "ubfx x23, x22, #1, #1",
         "ubfx x24, x22, #0, #1",
-        "ubfx x22, x22, #2, #1",
-        "orr w23, w23, w22",
-        "orr w24, w24, w22",
-        "strb w23, [x28, #744]",
+        "ubfx x25, x22, #2, #1",
+        "orr w22, w23, w25",
+        "orr w23, w24, w25",
+        "strb w22, [x28, #744]",
         "strb w21, [x28, #745]",
-        "strb w22, [x28, #746]",
-        "strb w24, [x28, #750]",
+        "strb w25, [x28, #746]",
+        "strb w23, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12753,8 +12905,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12788,20 +12940,20 @@
         "mov x22, x0",
         "ubfx x23, x22, #1, #1",
         "ubfx x24, x22, #0, #1",
-        "ubfx x22, x22, #2, #1",
-        "orr w23, w23, w22",
-        "orr w24, w24, w22",
-        "strb w23, [x28, #744]",
-        "mov w23, #0x0",
-        "strb w23, [x28, #745]",
-        "strb w22, [x28, #746]",
-        "strb w24, [x28, #750]",
+        "ubfx x25, x22, #2, #1",
+        "orr w22, w23, w25",
+        "orr w23, w24, w25",
+        "strb w22, [x28, #744]",
+        "mov w22, #0x0",
+        "strb w22, [x28, #745]",
+        "strb w25, [x28, #746]",
+        "strb w23, [x28, #750]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12813,8 +12965,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12848,21 +13000,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12874,8 +13026,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12909,21 +13061,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12935,8 +13087,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12970,21 +13122,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12996,8 +13148,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -13031,21 +13183,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -13057,8 +13209,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -13092,21 +13244,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -13118,8 +13270,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -13153,32 +13305,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fiadd word [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xde !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -13190,7 +13343,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "sxth w1, w21",
+        "sxth w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1312]",
         "blr x2",
@@ -13235,21 +13388,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fimul word [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xde !11b /1"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -13261,7 +13415,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "sxth w1, w21",
+        "sxth w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1312]",
         "blr x2",
@@ -13306,21 +13460,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "ficom word [rax]": {
-      "ExpectedInstructionCount": 69,
+      "ExpectedInstructionCount": 70,
       "Comment": [
         "0xde !11b /2"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -13332,7 +13487,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "sxth w1, w21",
+        "sxth w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1312]",
         "blr x2",
@@ -13380,24 +13535,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "ficomp word [rax]": {
-      "ExpectedInstructionCount": 77,
+      "ExpectedInstructionCount": 78,
       "Comment": [
         "0xde !11b /3"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -13409,7 +13565,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "sxth w1, w21",
+        "sxth w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1312]",
         "blr x2",
@@ -13457,32 +13613,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fisub word [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xde !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -13494,7 +13651,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "sxth w1, w21",
+        "sxth w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1312]",
         "blr x2",
@@ -13539,21 +13696,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fisubr word [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xde !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -13565,7 +13723,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "sxth w1, w21",
+        "sxth w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1312]",
         "blr x2",
@@ -13610,21 +13768,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fidiv word [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xde !11b /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -13636,7 +13795,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "sxth w1, w21",
+        "sxth w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1312]",
         "blr x2",
@@ -13681,21 +13840,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fidivr word [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xde !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -13707,7 +13867,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "sxth w1, w21",
+        "sxth w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1312]",
         "blr x2",
@@ -13752,11 +13912,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "faddp st0": {
@@ -13767,8 +13927,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -13799,19 +13959,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "faddp st1": {
@@ -13823,8 +13983,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -13855,18 +14015,18 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "faddp st2": {
@@ -13877,8 +14037,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -13909,19 +14069,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "faddp st3": {
@@ -13932,8 +14092,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -13964,19 +14124,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "faddp st4": {
@@ -13987,8 +14147,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14019,19 +14179,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "faddp st5": {
@@ -14042,8 +14202,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14074,19 +14234,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "faddp st6": {
@@ -14097,8 +14257,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14129,19 +14289,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "faddp st7": {
@@ -14152,8 +14312,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14184,19 +14344,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmulp st0": {
@@ -14207,8 +14367,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14239,19 +14399,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmulp st1": {
@@ -14263,8 +14423,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14295,18 +14455,18 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmulp st2": {
@@ -14317,8 +14477,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14349,19 +14509,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmulp st3": {
@@ -14372,8 +14532,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14404,19 +14564,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmulp st4": {
@@ -14427,8 +14587,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14459,19 +14619,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmulp st5": {
@@ -14482,8 +14642,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14514,19 +14674,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmulp st6": {
@@ -14537,8 +14697,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14569,19 +14729,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmulp st7": {
@@ -14592,8 +14752,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14624,19 +14784,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fcompp": {
@@ -14648,8 +14808,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14683,25 +14843,25 @@
         "mov x22, x0",
         "ubfx x23, x22, #1, #1",
         "ubfx x24, x22, #0, #1",
-        "ubfx x22, x22, #2, #1",
-        "orr w23, w23, w22",
-        "orr w24, w24, w22",
-        "strb w23, [x28, #744]",
-        "mov w23, #0x0",
-        "strb w23, [x28, #745]",
-        "strb w22, [x28, #746]",
-        "strb w24, [x28, #750]",
+        "ubfx x25, x22, #2, #1",
+        "orr w22, w23, w25",
+        "orr w23, w24, w25",
+        "strb w22, [x28, #744]",
+        "mov w22, #0x0",
+        "strb w22, [x28, #745]",
+        "strb w25, [x28, #746]",
+        "strb w23, [x28, #750]",
         "ldrb w22, [x28, #1026]",
         "lsl w23, w21, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "bic w24, w22, w23",
+        "strb w24, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "lsl w22, w21, w20",
+        "bic w21, w24, w22",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -14715,8 +14875,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14747,19 +14907,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubrp st1, st0": {
@@ -14771,8 +14931,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14803,18 +14963,18 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubrp st2, st0": {
@@ -14825,8 +14985,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14857,19 +15017,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubrp st3, st0": {
@@ -14880,8 +15040,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14912,19 +15072,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubrp st4, st0": {
@@ -14935,8 +15095,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14967,19 +15127,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubrp st5, st0": {
@@ -14990,8 +15150,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15022,19 +15182,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubrp st6, st0": {
@@ -15045,8 +15205,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15077,19 +15237,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubrp st7, st0": {
@@ -15100,8 +15260,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15132,19 +15292,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xde, 0xe8": {
@@ -15157,8 +15317,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15189,19 +15349,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubp st1, st0": {
@@ -15213,8 +15373,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15245,18 +15405,18 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubp st2, st0": {
@@ -15267,8 +15427,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15299,19 +15459,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubp st3, st0": {
@@ -15322,8 +15482,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15354,19 +15514,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubp st4, st0": {
@@ -15377,8 +15537,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15409,19 +15569,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubp st5, st0": {
@@ -15432,8 +15592,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15464,19 +15624,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubp st6, st0": {
@@ -15487,8 +15647,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15519,19 +15679,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubp st7, st0": {
@@ -15542,8 +15702,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15574,19 +15734,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xde, 0xf0": {
@@ -15599,8 +15759,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15631,19 +15791,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivrp st1, st0": {
@@ -15655,8 +15815,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15687,18 +15847,18 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivrp st2, st0": {
@@ -15709,8 +15869,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15741,19 +15901,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivrp st3, st0": {
@@ -15764,8 +15924,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15796,19 +15956,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivrp st4, st0": {
@@ -15819,8 +15979,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15851,19 +16011,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivrp st5, st0": {
@@ -15874,8 +16034,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15906,19 +16066,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivrp st6, st0": {
@@ -15929,8 +16089,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15961,19 +16121,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivrp st7, st0": {
@@ -15984,8 +16144,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16016,19 +16176,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xde, 0xf8": {
@@ -16041,8 +16201,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16073,19 +16233,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivp st1, st0": {
@@ -16097,8 +16257,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16129,18 +16289,18 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivp st2, st0": {
@@ -16151,8 +16311,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16183,19 +16343,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivp st3, st0": {
@@ -16206,8 +16366,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16238,19 +16398,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivp st4, st0": {
@@ -16261,8 +16421,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16293,19 +16453,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivp st5, st0": {
@@ -16316,8 +16476,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16348,19 +16508,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivp st6, st0": {
@@ -16371,8 +16531,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16403,19 +16563,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivp st7, st0": {
@@ -16426,8 +16586,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16458,66 +16618,71 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fild word [rax]": {
-      "ExpectedInstructionCount": 35,
+      "ExpectedInstructionCount": 40,
       "Comment": [
         "0xdf !11b /0"
       ],
       "ExpectedArm64ASM": [
+        "sub sp, sp, #0x20 (32)",
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "mov w22, #0x0",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "mov w21, #0x0",
+        "sxth x23, w22",
         "mrs x22, nzcv",
-        "cmp x21, #0x0 (0)",
-        "mov w23, #0x8000",
-        "csel x23, x23, xzr, lt",
-        "cneg x21, x21, mi",
-        "mov w24, #0x3f",
+        "cmp x23, #0x0 (0)",
+        "mov w24, #0x8000",
+        "csel x25, x24, xzr, lt",
+        "cneg x24, x23, mi",
+        "mov w23, #0x3f",
         "mov x0, #0x3f",
-        "clz x25, x21",
-        "sub x25, x0, x25",
-        "sub x24, x24, x25",
-        "lsl x25, x21, x24",
+        "clz x30, x24",
+        "sub x30, x0, x30",
+        "sub x18, x23, x30",
+        "lsl x23, x24, x18",
         "mov w30, #0x403e",
-        "sub x24, x30, x24",
-        "mov w30, #0x0",
-        "cmp x21, #0x0 (0)",
-        "csel x21, x30, x24, eq",
-        "orr x21, x23, x21",
-        "fmov d2, x25",
+        "str w22, [sp]",
+        "sub x22, x30, x18",
+        "cmp x24, #0x0 (0)",
+        "csel x30, x21, x22, eq",
+        "orr x21, x25, x30",
+        "fmov d2, x23",
         "fmov d3, x21",
-        "mov v2.d[1], v3.d[0]",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[1], v3.d[0]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x22"
+        "str q4, [x0, #768]",
+        "ldr w20, [sp]",
+        "msr nzcv, x20",
+        "add sp, sp, #0x20 (32)"
       ]
     },
     "fisttp word [rax]": {
-      "ExpectedInstructionCount": 38,
+      "ExpectedInstructionCount": 39,
       "Comment": [
         "0xdf !11b /1"
       ],
@@ -16551,19 +16716,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "sxth x21, w0",
-        "strh w21, [x4]",
+        "mov x22, x4",
+        "strh w21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fist word [rax]": {
-      "ExpectedInstructionCount": 30,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "0xdf !11b /2"
       ],
@@ -16597,11 +16763,12 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "sxth x20, w0",
-        "strh w20, [x4]"
+        "mov x21, x4",
+        "strh w20, [x21]"
       ]
     },
     "fistp word [rax]": {
-      "ExpectedInstructionCount": 38,
+      "ExpectedInstructionCount": 39,
       "Comment": [
         "0xdf !11b /3"
       ],
@@ -16635,33 +16802,35 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "sxth x21, w0",
-        "strh w21, [x4]",
+        "mov x22, x4",
+        "strh w21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fbld tword [rax]": {
-      "ExpectedInstructionCount": 40,
+      "ExpectedInstructionCount": 41,
       "Comment": [
         "0xdf !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
-        "ldr q2, [x4]",
+        "mov x21, x4",
+        "ldr q2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -16687,15 +16856,15 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fbstp tword [rax]": {
-      "ExpectedInstructionCount": 42,
+      "ExpectedInstructionCount": 43,
       "Comment": [
         "0xdf !11b /6"
       ],
@@ -16728,19 +16897,20 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "str d2, [x4]",
-        "mov x21, v2.d[1]",
-        "strh w21, [x4, #8]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "mov x21, x4",
+        "str d3, [x21]",
+        "mov x22, v3.d[1]",
+        "strh w22, [x21, #8]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -16751,8 +16921,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -16763,8 +16933,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -16775,8 +16945,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -16787,8 +16957,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -16799,8 +16969,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -16811,8 +16981,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -16823,8 +16993,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -16835,41 +17005,45 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fnstsw ax": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 16,
       "Comment": [
         "0xdf 11b 0xe0 /4"
       ],
       "ExpectedArm64ASM": [
         "mov w20, #0x0",
         "ldrb w21, [x28, #747]",
-        "bfi x20, x21, #11, #3",
-        "ldrb w21, [x28, #744]",
-        "ldrb w22, [x28, #745]",
+        "mov x22, x20",
+        "bfi x22, x21, #11, #3",
+        "ldrb w20, [x28, #744]",
+        "ldrb w21, [x28, #745]",
         "ldrb w23, [x28, #746]",
         "ldrb w24, [x28, #750]",
-        "orr x20, x20, x21, lsl #8",
-        "orr x20, x20, x22, lsl #9",
-        "orr x20, x20, x23, lsl #10",
-        "orr x20, x20, x24, lsl #14",
-        "bfxil x4, x20, #0, #16"
+        "orr x25, x22, x20, lsl #8",
+        "orr x20, x25, x21, lsl #9",
+        "orr x21, x20, x23, lsl #10",
+        "orr x20, x21, x24, lsl #14",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "fucomip st0": {
-      "ExpectedInstructionCount": 51,
+      "ExpectedInstructionCount": 52,
       "Comment": [
         "0xdf 11b 0xe8 /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16903,24 +17077,25 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "rmif x22, #63, #nzCv",
-        "rmif x23, #62, #nZcv",
-        "mov w22, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "rmif x21, #63, #nzCv",
+        "rmif x22, #62, #nZcv",
+        "mov w21, #0x1",
+        "eor w22, w24, #0x1",
+        "mov x26, x22",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fucomip st1": {
-      "ExpectedInstructionCount": 51,
+      "ExpectedInstructionCount": 52,
       "Comment": [
         "0xdf 11b 0xe9 /5"
       ],
@@ -16928,8 +17103,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16963,31 +17138,32 @@
         "mov x22, x0",
         "ubfx x23, x22, #1, #1",
         "ubfx x24, x22, #0, #1",
-        "ubfx x22, x22, #2, #1",
-        "orr w23, w23, w22",
-        "orr w24, w24, w22",
-        "rmif x23, #63, #nzCv",
-        "rmif x24, #62, #nZcv",
-        "eor w26, w22, #0x1",
+        "ubfx x25, x22, #2, #1",
+        "orr w22, w23, w25",
+        "orr w23, w24, w25",
+        "rmif x22, #63, #nzCv",
+        "rmif x23, #62, #nZcv",
+        "eor w22, w25, #0x1",
+        "mov x26, x22",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fucomip st2": {
-      "ExpectedInstructionCount": 51,
+      "ExpectedInstructionCount": 52,
       "Comment": [
         "0xdf 11b 0xea /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17021,32 +17197,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "rmif x22, #63, #nzCv",
-        "rmif x23, #62, #nZcv",
-        "mov w22, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "rmif x21, #63, #nzCv",
+        "rmif x22, #62, #nZcv",
+        "mov w21, #0x1",
+        "eor w22, w24, #0x1",
+        "mov x26, x22",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fucomip st3": {
-      "ExpectedInstructionCount": 51,
+      "ExpectedInstructionCount": 52,
       "Comment": [
         "0xdf 11b 0xeb /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17080,32 +17257,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "rmif x22, #63, #nzCv",
-        "rmif x23, #62, #nZcv",
-        "mov w22, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "rmif x21, #63, #nzCv",
+        "rmif x22, #62, #nZcv",
+        "mov w21, #0x1",
+        "eor w22, w24, #0x1",
+        "mov x26, x22",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fucomip st4": {
-      "ExpectedInstructionCount": 51,
+      "ExpectedInstructionCount": 52,
       "Comment": [
         "0xdf 11b 0xec /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17139,32 +17317,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "rmif x22, #63, #nzCv",
-        "rmif x23, #62, #nZcv",
-        "mov w22, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "rmif x21, #63, #nzCv",
+        "rmif x22, #62, #nZcv",
+        "mov w21, #0x1",
+        "eor w22, w24, #0x1",
+        "mov x26, x22",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fucomip st5": {
-      "ExpectedInstructionCount": 51,
+      "ExpectedInstructionCount": 52,
       "Comment": [
         "0xdf 11b 0xed /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17198,32 +17377,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "rmif x22, #63, #nzCv",
-        "rmif x23, #62, #nZcv",
-        "mov w22, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "rmif x21, #63, #nzCv",
+        "rmif x22, #62, #nZcv",
+        "mov w21, #0x1",
+        "eor w22, w24, #0x1",
+        "mov x26, x22",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fucomip st6": {
-      "ExpectedInstructionCount": 51,
+      "ExpectedInstructionCount": 52,
       "Comment": [
         "0xdf 11b 0xee /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17257,32 +17437,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "rmif x22, #63, #nzCv",
-        "rmif x23, #62, #nZcv",
-        "mov w22, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "rmif x21, #63, #nzCv",
+        "rmif x22, #62, #nZcv",
+        "mov w21, #0x1",
+        "eor w22, w24, #0x1",
+        "mov x26, x22",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fucomip st7": {
-      "ExpectedInstructionCount": 51,
+      "ExpectedInstructionCount": 52,
       "Comment": [
         "0xdf 11b 0xef /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17316,32 +17497,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "rmif x22, #63, #nzCv",
-        "rmif x23, #62, #nZcv",
-        "mov w22, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "rmif x21, #63, #nzCv",
+        "rmif x22, #62, #nZcv",
+        "mov w21, #0x1",
+        "eor w22, w24, #0x1",
+        "mov x26, x22",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcomip st0": {
-      "ExpectedInstructionCount": 51,
+      "ExpectedInstructionCount": 52,
       "Comment": [
         "0xdf 11b 0xf0 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17375,24 +17557,25 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "rmif x22, #63, #nzCv",
-        "rmif x23, #62, #nZcv",
-        "mov w22, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "rmif x21, #63, #nzCv",
+        "rmif x22, #62, #nZcv",
+        "mov w21, #0x1",
+        "eor w22, w24, #0x1",
+        "mov x26, x22",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcomip st1": {
-      "ExpectedInstructionCount": 51,
+      "ExpectedInstructionCount": 52,
       "Comment": [
         "0xdf 11b 0xf1 /6"
       ],
@@ -17400,8 +17583,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17435,31 +17618,32 @@
         "mov x22, x0",
         "ubfx x23, x22, #1, #1",
         "ubfx x24, x22, #0, #1",
-        "ubfx x22, x22, #2, #1",
-        "orr w23, w23, w22",
-        "orr w24, w24, w22",
-        "rmif x23, #63, #nzCv",
-        "rmif x24, #62, #nZcv",
-        "eor w26, w22, #0x1",
+        "ubfx x25, x22, #2, #1",
+        "orr w22, w23, w25",
+        "orr w23, w24, w25",
+        "rmif x22, #63, #nzCv",
+        "rmif x23, #62, #nZcv",
+        "eor w22, w25, #0x1",
+        "mov x26, x22",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcomip st2": {
-      "ExpectedInstructionCount": 51,
+      "ExpectedInstructionCount": 52,
       "Comment": [
         "0xdf 11b 0xf2 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17493,32 +17677,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "rmif x22, #63, #nzCv",
-        "rmif x23, #62, #nZcv",
-        "mov w22, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "rmif x21, #63, #nzCv",
+        "rmif x22, #62, #nZcv",
+        "mov w21, #0x1",
+        "eor w22, w24, #0x1",
+        "mov x26, x22",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcomip st3": {
-      "ExpectedInstructionCount": 51,
+      "ExpectedInstructionCount": 52,
       "Comment": [
         "0xdf 11b 0xf3 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17552,32 +17737,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "rmif x22, #63, #nzCv",
-        "rmif x23, #62, #nZcv",
-        "mov w22, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "rmif x21, #63, #nzCv",
+        "rmif x22, #62, #nZcv",
+        "mov w21, #0x1",
+        "eor w22, w24, #0x1",
+        "mov x26, x22",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcomip st4": {
-      "ExpectedInstructionCount": 51,
+      "ExpectedInstructionCount": 52,
       "Comment": [
         "0xdf 11b 0xf4 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17611,32 +17797,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "rmif x22, #63, #nzCv",
-        "rmif x23, #62, #nZcv",
-        "mov w22, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "rmif x21, #63, #nzCv",
+        "rmif x22, #62, #nZcv",
+        "mov w21, #0x1",
+        "eor w22, w24, #0x1",
+        "mov x26, x22",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcomip st5": {
-      "ExpectedInstructionCount": 51,
+      "ExpectedInstructionCount": 52,
       "Comment": [
         "0xdf 11b 0xf5 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17670,32 +17857,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "rmif x22, #63, #nzCv",
-        "rmif x23, #62, #nZcv",
-        "mov w22, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "rmif x21, #63, #nzCv",
+        "rmif x22, #62, #nZcv",
+        "mov w21, #0x1",
+        "eor w22, w24, #0x1",
+        "mov x26, x22",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcomip st6": {
-      "ExpectedInstructionCount": 51,
+      "ExpectedInstructionCount": 52,
       "Comment": [
         "0xdf 11b 0xf6 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17729,32 +17917,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "rmif x22, #63, #nzCv",
-        "rmif x23, #62, #nZcv",
-        "mov w22, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "rmif x21, #63, #nzCv",
+        "rmif x22, #62, #nZcv",
+        "mov w21, #0x1",
+        "eor w22, w24, #0x1",
+        "mov x26, x22",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcomip st7": {
-      "ExpectedInstructionCount": 51,
+      "ExpectedInstructionCount": 52,
       "Comment": [
         "0xdf 11b 0xf7 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17788,19 +17977,20 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "rmif x22, #63, #nzCv",
-        "rmif x23, #62, #nZcv",
-        "mov w22, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "rmif x21, #63, #nzCv",
+        "rmif x22, #62, #nZcv",
+        "mov w21, #0x1",
+        "eor w22, w24, #0x1",
+        "mov x26, x22",
+        "ldrb w22, [x28, #1026]",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     }
diff --git a/unittests/InstructionCountCI/FlagM/x87_f64.json b/unittests/InstructionCountCI/FlagM/x87_f64.json
index 8a8148774d..4feb9ac59a 100644
--- a/unittests/InstructionCountCI/FlagM/x87_f64.json
+++ b/unittests/InstructionCountCI/FlagM/x87_f64.json
@@ -16,49 +16,52 @@
   },
   "Instructions": {
     "fadd dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xd8 !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "add x0, x28, x20, lsl #4",
-        "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "ldr d2, [x0, #768]",
+        "fadd d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xd8 !11b /1"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "add x0, x28, x20, lsl #4",
-        "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "ldr d2, [x0, #768]",
+        "fmul d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fcom dword [rax]": {
-      "ExpectedInstructionCount": 15,
+      "ExpectedInstructionCount": 16,
       "Comment": [
         "0xd8 !11b /2"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "add x0, x28, x20, lsl #4",
-        "ldr d3, [x0, #768]",
-        "fcmp d3, d2",
+        "ldr d2, [x0, #768]",
+        "fcmp d2, d3",
         "mov w20, #0x0",
         "cset w21, vs",
         "axflag",
@@ -71,17 +74,18 @@
       ]
     },
     "fcomp dword [rax]": {
-      "ExpectedInstructionCount": 23,
+      "ExpectedInstructionCount": 24,
       "Comment": [
         "0xd8 !11b /3"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "add x0, x28, x20, lsl #4",
-        "ldr d3, [x0, #768]",
-        "fcmp d3, d2",
+        "ldr d2, [x0, #768]",
+        "fcmp d2, d3",
         "mov w21, #0x1",
         "mov w22, #0x0",
         "cset w23, vs",
@@ -93,76 +97,80 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fsub dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xd8 !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "add x0, x28, x20, lsl #4",
-        "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "ldr d2, [x0, #768]",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xd8 !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "add x0, x28, x20, lsl #4",
-        "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "ldr d2, [x0, #768]",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xd8 !11b /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "add x0, x28, x20, lsl #4",
-        "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "ldr d2, [x0, #768]",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xd8 !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "add x0, x28, x20, lsl #4",
-        "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "ldr d2, [x0, #768]",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st0, st0": {
@@ -173,14 +181,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st0, st1": {
@@ -191,14 +199,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st0, st2": {
@@ -209,14 +217,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st0, st3": {
@@ -227,14 +235,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st0, st4": {
@@ -245,14 +253,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st0, st5": {
@@ -263,14 +271,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st0, st6": {
@@ -281,14 +289,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st0, st7": {
@@ -299,14 +307,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st0, st0": {
@@ -317,14 +325,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st0, st1": {
@@ -335,14 +343,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st0, st2": {
@@ -353,14 +361,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st0, st3": {
@@ -371,14 +379,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st0, st4": {
@@ -389,14 +397,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st0, st5": {
@@ -407,14 +415,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st0, st6": {
@@ -425,14 +433,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st0, st7": {
@@ -443,14 +451,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fcom st0, st0": {
@@ -462,8 +470,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x0",
         "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -486,8 +494,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -511,8 +519,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -536,8 +544,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -561,8 +569,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -586,8 +594,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -611,8 +619,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -636,8 +644,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -662,8 +670,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x0",
         "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -678,11 +686,11 @@
         "strb w21, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -695,8 +703,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -711,11 +719,11 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -727,8 +735,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -744,11 +752,11 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -760,8 +768,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -777,11 +785,11 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -793,8 +801,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -810,11 +818,11 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -826,8 +834,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -843,11 +851,11 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -859,8 +867,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -876,11 +884,11 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -892,8 +900,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -909,11 +917,11 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -925,14 +933,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st0, st1": {
@@ -943,14 +951,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st0, st2": {
@@ -961,14 +969,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st0, st3": {
@@ -979,14 +987,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st0, st4": {
@@ -997,14 +1005,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st0, st5": {
@@ -1015,14 +1023,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st0, st6": {
@@ -1033,14 +1041,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st0, st7": {
@@ -1051,14 +1059,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st0, st0": {
@@ -1069,14 +1077,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st0, st1": {
@@ -1087,14 +1095,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st0, st2": {
@@ -1105,14 +1113,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st0, st3": {
@@ -1123,14 +1131,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st0, st4": {
@@ -1141,14 +1149,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st0, st5": {
@@ -1159,14 +1167,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st0, st6": {
@@ -1177,14 +1185,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st0, st7": {
@@ -1195,14 +1203,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st0, st0": {
@@ -1213,14 +1221,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st0, st1": {
@@ -1231,14 +1239,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st0, st2": {
@@ -1249,14 +1257,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st0, st3": {
@@ -1267,14 +1275,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st0, st4": {
@@ -1285,14 +1293,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st0, st5": {
@@ -1303,14 +1311,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st0, st6": {
@@ -1321,14 +1329,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st0, st7": {
@@ -1339,14 +1347,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st0, st0": {
@@ -1357,14 +1365,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st0, st1": {
@@ -1375,14 +1383,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st0, st2": {
@@ -1393,14 +1401,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st0, st3": {
@@ -1411,14 +1419,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st0, st4": {
@@ -1429,14 +1437,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st0, st5": {
@@ -1447,14 +1455,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st0, st6": {
@@ -1465,14 +1473,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st0, st7": {
@@ -1483,39 +1491,40 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fld dword [rax]": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xd9 !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "fst dword [rax]": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xd9 !11b /2"
       ],
@@ -1523,12 +1532,13 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "fcvt s2, d2",
-        "str s2, [x4]"
+        "fcvt s3, d2",
+        "mov x20, x4",
+        "str s3, [x20]"
       ]
     },
     "fstp dword [rax]": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xd9 !11b /3"
       ],
@@ -1536,98 +1546,101 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "fcvt s2, d2",
-        "str s2, [x4]",
+        "fcvt s3, d2",
+        "mov x21, x4",
+        "str s3, [x21]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fldenv [rax]": {
-      "ExpectedInstructionCount": 56,
+      "ExpectedInstructionCount": 57,
       "Comment": [
         "0xd9 !11b /4"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x4]",
-        "ubfx w21, w20, #10, #3",
-        "rbit w1, w21",
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "ubfx w22, w21, #10, #3",
+        "rbit w1, w22",
         "lsr w1, w1, #30",
         "mrs x0, fpcr",
         "bfi x0, x1, #22, #2",
-        "lsr x1, x21, #2",
+        "lsr x1, x22, #2",
         "bfi x0, x1, #24, #1",
         "msr fpcr, x0",
-        "strh w20, [x28, #1024]",
-        "ldr w20, [x4, #4]",
-        "ubfx w21, w20, #11, #3",
-        "strb w21, [x28, #747]",
-        "ubfx w21, w20, #8, #1",
-        "ubfx w22, w20, #9, #1",
-        "ubfx w23, w20, #10, #1",
-        "ubfx w20, w20, #14, #1",
-        "strb w21, [x28, #744]",
-        "strb w22, [x28, #745]",
-        "strb w23, [x28, #746]",
-        "strb w20, [x28, #750]",
-        "ldr w20, [x4, #8]",
-        "ubfx w21, w20, #0, #2",
+        "strh w21, [x28, #1024]",
+        "ldr w21, [x20, #4]",
+        "ubfx w22, w21, #11, #3",
+        "strb w22, [x28, #747]",
+        "ubfx w22, w21, #8, #1",
+        "ubfx w23, w21, #9, #1",
+        "ubfx w24, w21, #10, #1",
+        "ubfx w25, w21, #14, #1",
+        "strb w22, [x28, #744]",
+        "strb w23, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w25, [x28, #750]",
+        "ldr w21, [x20, #8]",
+        "ubfx w20, w21, #0, #2",
         "mrs x22, nzcv",
-        "cmp x21, #0x3 (3)",
-        "cset x21, ne",
-        "ubfx w23, w20, #2, #2",
-        "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #1",
-        "ubfx w23, w20, #4, #2",
-        "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #2",
-        "ubfx w23, w20, #6, #2",
-        "cmp x23, #0x3 (3)",
+        "cmp x20, #0x3 (3)",
         "cset x23, ne",
-        "orr w21, w21, w23, lsl #3",
-        "ubfx w23, w20, #8, #2",
+        "ubfx w20, w21, #2, #2",
+        "cmp x20, #0x3 (3)",
+        "cset x24, ne",
+        "orr w20, w23, w24, lsl #1",
+        "ubfx w23, w21, #4, #2",
         "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #4",
-        "ubfx w23, w20, #10, #2",
+        "cset x24, ne",
+        "orr w23, w20, w24, lsl #2",
+        "ubfx w20, w21, #6, #2",
+        "cmp x20, #0x3 (3)",
+        "cset x24, ne",
+        "orr w20, w23, w24, lsl #3",
+        "ubfx w23, w21, #8, #2",
         "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #5",
-        "ubfx w23, w20, #12, #2",
+        "cset x24, ne",
+        "orr w23, w20, w24, lsl #4",
+        "ubfx w20, w21, #10, #2",
+        "cmp x20, #0x3 (3)",
+        "cset x24, ne",
+        "orr w20, w23, w24, lsl #5",
+        "ubfx w23, w21, #12, #2",
         "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #6",
-        "ubfx w20, w20, #14, #2",
+        "cset x24, ne",
+        "orr w23, w20, w24, lsl #6",
+        "ubfx w20, w21, #14, #2",
         "cmp x20, #0x3 (3)",
-        "cset x20, ne",
-        "orr w20, w21, w20, lsl #7",
+        "cset x21, ne",
+        "orr w20, w23, w21, lsl #7",
         "strb w20, [x28, #1026]",
         "msr nzcv, x22"
       ]
     },
     "fldcw [rax]": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "0xd9 !11b /5"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x4]",
-        "ubfx w21, w20, #10, #3",
-        "rbit w1, w21",
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "ubfx w20, w21, #10, #3",
+        "rbit w1, w20",
         "lsr w1, w1, #30",
         "mrs x0, fpcr",
         "bfi x0, x1, #22, #2",
-        "lsr x1, x21, #2",
+        "lsr x1, x20, #2",
         "bfi x0, x1, #24, #1",
         "msr fpcr, x0",
-        "strh w20, [x28, #1024]"
+        "strh w21, [x28, #1024]"
       ]
     },
     "fnstenv [rax]": {
@@ -1636,80 +1649,81 @@
         "0xd9 !11b /6"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x28, #1024]",
-        "str w20, [x4]",
-        "mov w20, #0x0",
-        "ldrb w21, [x28, #747]",
-        "mov x0, x20",
-        "bfi x0, x21, #11, #3",
-        "mov x21, x0",
+        "mov x20, x4",
+        "ldrh w21, [x28, #1024]",
+        "str w21, [x20]",
+        "mov w21, #0x0",
+        "ldrb w22, [x28, #747]",
+        "mov x23, x21",
+        "bfi x23, x22, #11, #3",
         "ldrb w22, [x28, #744]",
-        "ldrb w23, [x28, #745]",
-        "ldrb w24, [x28, #746]",
-        "ldrb w25, [x28, #750]",
-        "orr x21, x21, x22, lsl #8",
-        "orr x21, x21, x23, lsl #9",
-        "orr x21, x21, x24, lsl #10",
-        "orr x21, x21, x25, lsl #14",
-        "str w21, [x4, #4]",
-        "ldrb w21, [x28, #1026]",
-        "and w22, w21, #0x1",
-        "mov w23, #0x3",
-        "mrs x24, nzcv",
+        "ldrb w24, [x28, #745]",
+        "ldrb w25, [x28, #746]",
+        "ldrb w30, [x28, #750]",
+        "orr x18, x23, x22, lsl #8",
+        "orr x22, x18, x24, lsl #9",
+        "orr x23, x22, x25, lsl #10",
+        "orr x22, x23, x30, lsl #14",
+        "str w22, [x20, #4]",
+        "ldrb w22, [x28, #1026]",
+        "and w23, w22, #0x1",
+        "mov w24, #0x3",
+        "mrs x25, nzcv",
+        "cmp x23, #0x0 (0)",
+        "csel x30, x24, x21, eq",
+        "orr w23, w21, w30",
+        "lsr w30, w22, #1",
+        "and w18, w30, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x30, x24, x21, eq",
+        "orr w18, w23, w30, lsl #2",
+        "lsr w23, w22, #2",
+        "and w30, w23, #0x1",
+        "cmp x30, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w30, w18, w23, lsl #4",
+        "lsr w23, w22, #3",
+        "and w18, w23, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w18, w30, w23, lsl #6",
+        "lsr w23, w22, #4",
+        "and w30, w23, #0x1",
+        "cmp x30, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w30, w18, w23, lsl #8",
+        "lsr w23, w22, #5",
+        "and w18, w23, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w18, w30, w23, lsl #10",
+        "lsr w23, w22, #6",
+        "and w30, w23, #0x1",
+        "cmp x30, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w30, w18, w23, lsl #12",
+        "lsr w23, w22, #7",
+        "and w22, w23, #0x1",
         "cmp x22, #0x0 (0)",
-        "csel x22, x23, x20, eq",
-        "orr w22, w20, w22",
-        "lsr w25, w21, #1",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #2",
-        "lsr w25, w21, #2",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #4",
-        "lsr w25, w21, #3",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #6",
-        "lsr w25, w21, #4",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #8",
-        "lsr w25, w21, #5",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #10",
-        "lsr w25, w21, #6",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #12",
-        "lsr w21, w21, #7",
-        "and w21, w21, #0x1",
-        "cmp x21, #0x0 (0)",
-        "csel x21, x23, x20, eq",
-        "orr w21, w22, w21, lsl #14",
-        "str w21, [x4, #8]",
-        "str w20, [x4, #12]",
-        "str w20, [x4, #16]",
-        "str w20, [x4, #20]",
-        "str w20, [x4, #24]",
-        "msr nzcv, x24"
+        "csel x23, x24, x21, eq",
+        "orr w22, w30, w23, lsl #14",
+        "str w22, [x20, #8]",
+        "str w21, [x20, #12]",
+        "str w21, [x20, #16]",
+        "str w21, [x20, #20]",
+        "str w21, [x20, #24]",
+        "msr nzcv, x25"
       ]
     },
     "fnstcw [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0xd9 !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrh w20, [x28, #1024]",
-        "strh w20, [x4]"
+        "mov x21, x4",
+        "strh w20, [x21]"
       ]
     },
     "fld st0": {
@@ -1720,15 +1734,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -1744,14 +1758,14 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -1766,15 +1780,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -1789,15 +1803,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -1812,15 +1826,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -1835,15 +1849,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -1858,15 +1872,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -1881,15 +1895,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -1904,14 +1918,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -1923,14 +1937,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -1942,14 +1956,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -1961,14 +1975,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -1980,14 +1994,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -1999,14 +2013,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -2018,14 +2032,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -2037,14 +2051,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -2064,9 +2078,9 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "fneg v2.2d, v2.2d",
+        "fneg v3.2d, v2.2d",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "fabs": {
@@ -2078,9 +2092,9 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "fabs d2, d2",
+        "fabs d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "ftst": {
@@ -2115,19 +2129,19 @@
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
         "mov x21, v2.d[0]",
-        "lsr x21, x21, #63",
-        "strb w21, [x28, #745]",
+        "lsr x22, x21, #63",
+        "strb w22, [x28, #745]",
         "ldrb w21, [x28, #1026]",
-        "lsr w20, w21, w20",
-        "mov w21, #0x1",
-        "and w20, w20, #0x1",
+        "lsr w22, w21, w20",
+        "mov w20, #0x1",
+        "and w21, w22, #0x1",
         "mov w22, #0x0",
         "mrs x23, nzcv",
-        "cmp x20, #0x1 (1)",
-        "csel x21, x22, x21, eq",
-        "strb w21, [x28, #744]",
-        "strb w20, [x28, #746]",
-        "strb w21, [x28, #750]",
+        "cmp x21, #0x1 (1)",
+        "csel x24, x22, x20, eq",
+        "strb w24, [x28, #744]",
+        "strb w21, [x28, #746]",
+        "strb w24, [x28, #750]",
         "msr nzcv, x23"
       ]
     },
@@ -2139,11 +2153,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "mov x21, #0x3ff0000000000000",
@@ -2160,11 +2174,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "mov x21, #0xa372",
@@ -2184,11 +2198,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "mov x21, #0x82fe",
@@ -2208,11 +2222,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "mov x21, #0x2d18",
@@ -2232,11 +2246,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "mov x21, #0x79ff",
@@ -2256,11 +2270,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "mov x21, #0x39ef",
@@ -2280,11 +2294,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "mov w21, #0x0",
@@ -2352,9 +2366,9 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
+        "mov v3.8b, v0.8b",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "fyl2x": {
@@ -2366,15 +2380,15 @@
         "ldrb w20, [x28, #747]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "strb w21, [x28, #747]",
+        "and w22, w21, #0x7",
+        "strb w22, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr d3, [x0, #768]",
         "mov v0.8b, v2.8b",
         "mov v1.8b, v3.8b",
@@ -2427,9 +2441,9 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "mov v4.8b, v0.8b",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fptan": {
@@ -2441,12 +2455,12 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "sub w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w22",
-        "orr w21, w23, w21",
+        "and w23, w22, #0x7",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w23",
+        "orr w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "strb w22, [x28, #747]",
+        "strb w23, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
@@ -2499,15 +2513,15 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
+        "mov v3.8b, v0.8b",
         "mov x21, #0x3ff0000000000000",
-        "fmov d3, x21",
+        "fmov d2, x21",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
-        "str d3, [x0, #768]"
+        "str d3, [x0, #768]",
+        "add x0, x28, x23, lsl #4",
+        "str d2, [x0, #768]"
       ]
     },
     "fpatan": {
@@ -2519,15 +2533,15 @@
         "ldrb w20, [x28, #747]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "strb w21, [x28, #747]",
+        "and w22, w21, #0x7",
+        "strb w22, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr d3, [x0, #768]",
         "mov v0.8b, v3.8b",
         "mov v1.8b, v2.8b",
@@ -2580,9 +2594,9 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "mov v4.8b, v0.8b",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fxtract": {
@@ -2594,25 +2608,25 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "sub w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w22",
-        "orr w21, w23, w21",
+        "and w23, w22, #0x7",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w23",
+        "orr w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "strb w22, [x28, #747]",
+        "strb w23, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
         "mov x21, v2.d[0]",
-        "and x23, x21, #0x7ff0000000000000",
-        "lsr x23, x23, #52",
-        "sub x23, x23, #0x3ff (1023)",
-        "scvtf d2, x23",
-        "and x21, x21, #0x800fffffffffffff",
-        "orr x21, x21, #0x3ff0000000000000",
+        "and x22, x21, #0x7ff0000000000000",
+        "lsr x24, x22, #52",
+        "sub x22, x24, #0x3ff (1023)",
+        "scvtf d2, x22",
+        "and x22, x21, #0x800fffffffffffff",
+        "orr x21, x22, #0x3ff0000000000000",
         "fmov d3, x21",
         "add x0, x28, x20, lsl #4",
         "str d2, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "str d3, [x0, #768]"
       ]
     },
@@ -2624,10 +2638,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr d3, [x0, #768]",
         "mov v0.8b, v2.8b",
         "mov v1.8b, v3.8b",
@@ -2680,11 +2694,11 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
+        "mov v4.8b, v0.8b",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdecstp": {
@@ -2694,8 +2708,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -2706,8 +2720,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -2719,10 +2733,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr d3, [x0, #768]",
         "mov v0.8b, v2.8b",
         "mov v1.8b, v3.8b",
@@ -2775,11 +2789,11 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
+        "mov v4.8b, v0.8b",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fyl2xp1": {
@@ -2791,20 +2805,20 @@
         "ldrb w20, [x28, #747]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "strb w21, [x28, #747]",
+        "and w22, w21, #0x7",
+        "strb w22, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr d3, [x0, #768]",
         "mov x20, #0x3ff0000000000000",
         "fmov d4, x20",
-        "fadd d2, d2, d4",
-        "mov v0.8b, v2.8b",
+        "fadd d5, d2, d4",
+        "mov v0.8b, v5.8b",
         "mov v1.8b, v3.8b",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -2856,7 +2870,7 @@
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
         "mov v2.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str d2, [x0, #768]"
       ]
     },
@@ -2869,9 +2883,9 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "fsqrt d2, d2",
+        "fsqrt d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "fsincos": {
@@ -2883,12 +2897,12 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "sub w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w22",
-        "orr w21, w23, w21",
+        "and w23, w22, #0x7",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w23",
+        "orr w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "strb w22, [x28, #747]",
+        "strb w23, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
@@ -2992,13 +3006,13 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
+        "mov v4.8b, v0.8b",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
         "str d3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "frndint": {
@@ -3010,9 +3024,9 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "frinti d2, d2",
+        "frinti d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "fscale": {
@@ -3023,10 +3037,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr d3, [x0, #768]",
         "mov v0.8b, v2.8b",
         "mov v1.8b, v3.8b",
@@ -3079,9 +3093,9 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
+        "mov v4.8b, v0.8b",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsin": {
@@ -3143,11 +3157,11 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
+        "mov v3.8b, v0.8b",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "fcos": {
@@ -3209,54 +3223,57 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
+        "mov v3.8b, v0.8b",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "fiadd dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xda !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fimul dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xda !11b /1"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "ficom dword [rax]": {
-      "ExpectedInstructionCount": 15,
+      "ExpectedInstructionCount": 16,
       "Comment": [
         "0xda !11b /2"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
@@ -3272,14 +3289,15 @@
       ]
     },
     "ficomp dword [rax]": {
-      "ExpectedInstructionCount": 23,
+      "ExpectedInstructionCount": 24,
       "Comment": [
         "0xda !11b /3"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
@@ -3294,80 +3312,84 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fisub dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xda !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fisubr dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xda !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fidiv dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xda !11b /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fidivr dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xda !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fcmovb st0, st0": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc0 /0"
       ],
@@ -3376,18 +3398,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc1 /0"
       ],
@@ -3396,18 +3419,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st2": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc2 /0"
       ],
@@ -3416,18 +3440,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st3": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc3 /0"
       ],
@@ -3436,18 +3461,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st4": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc4 /0"
       ],
@@ -3456,18 +3482,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st5": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc5 /0"
       ],
@@ -3476,18 +3503,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st6": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc6 /0"
       ],
@@ -3496,18 +3524,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st7": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc7 /0"
       ],
@@ -3516,18 +3545,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st0": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc8 /1"
       ],
@@ -3536,18 +3566,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc9 /1"
       ],
@@ -3556,18 +3587,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st2": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xca /1"
       ],
@@ -3576,18 +3608,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st3": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xcb /1"
       ],
@@ -3596,18 +3629,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st4": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xcc /1"
       ],
@@ -3616,18 +3650,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st5": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xcd /1"
       ],
@@ -3636,18 +3671,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st6": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xce /1"
       ],
@@ -3656,18 +3692,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st7": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xcf /1"
       ],
@@ -3676,398 +3713,423 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st0": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd0 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st1": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd1 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st2": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd2 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st3": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd3 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st4": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd4 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st5": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd5 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st6": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd6 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st7": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd7 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovu st0, st0": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xd8 /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x0 (0)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st1": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xd9 /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x1 (1)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st2": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xda /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x2 (2)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x2 (2)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st3": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xdb /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x3 (3)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x3 (3)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st4": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xdc /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x4 (4)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x4 (4)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st5": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xdd /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x5 (5)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x5 (5)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st6": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xde /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x6 (6)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x6 (6)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st7": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xdf /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x7 (7)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x7 (7)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fucompp": {
@@ -4079,8 +4141,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -4096,41 +4158,42 @@
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
         "lsl w23, w21, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "bic w24, w22, w23",
+        "strb w24, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "lsl w22, w21, w20",
+        "bic w21, w24, w22",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fild dword [rax]": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdf !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "str d2, [x0, #768]"
       ]
     },
     "fisttp dword [rax]": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb !11b /1"
       ],
@@ -4139,19 +4202,20 @@
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
         "fcvtzs w21, d2",
-        "str w21, [x4]",
+        "mov x22, x4",
+        "str w21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fist dword [rax]": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xdb !11b /2"
       ],
@@ -4161,11 +4225,12 @@
         "ldr d2, [x0, #768]",
         "frinti d0, d2",
         "fcvtzs w20, d0",
-        "str w20, [x4]"
+        "mov x21, x4",
+        "str w20, [x21]"
       ]
     },
     "fistp dword [rax]": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xdf !11b /7"
       ],
@@ -4175,25 +4240,27 @@
         "ldr d2, [x0, #768]",
         "frinti d0, d2",
         "fcvtzs w21, d0",
-        "str w21, [x4]",
+        "mov x22, x4",
+        "str w21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fld tword [rax]": {
-      "ExpectedInstructionCount": 38,
+      "ExpectedInstructionCount": 39,
       "Comment": [
         "0xdb !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr q2, [x4]",
+        "mov x21, x4",
+        "ldr q2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -4219,21 +4286,21 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v2.8b, v0.8b",
+        "mov v3.8b, v0.8b",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "fstp tword [rax]": {
-      "ExpectedInstructionCount": 41,
+      "ExpectedInstructionCount": 42,
       "Comment": [
         "0xdb !11b /7"
       ],
@@ -4265,24 +4332,25 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "str d2, [x4]",
-        "mov x21, v2.d[1]",
-        "strh w21, [x4, #8]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "mov x21, x4",
+        "str d3, [x21]",
+        "mov x22, v3.d[1]",
+        "strh w22, [x21, #8]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcmovnb st0, st0": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc0 /0"
       ],
@@ -4291,18 +4359,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc1 /0"
       ],
@@ -4311,18 +4380,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st2": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc2 /0"
       ],
@@ -4331,18 +4401,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st3": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc3 /0"
       ],
@@ -4351,18 +4422,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st4": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc4 /0"
       ],
@@ -4371,18 +4443,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st5": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc5 /0"
       ],
@@ -4391,18 +4464,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st6": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc6 /0"
       ],
@@ -4411,18 +4485,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st7": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc7 /0"
       ],
@@ -4431,18 +4506,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st0": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc8 /1"
       ],
@@ -4451,18 +4527,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc9 /1"
       ],
@@ -4471,18 +4548,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st2": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xca /1"
       ],
@@ -4491,18 +4569,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st3": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xcb /1"
       ],
@@ -4511,18 +4590,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st4": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xcc /1"
       ],
@@ -4531,18 +4611,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st5": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xcd /1"
       ],
@@ -4551,18 +4632,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st6": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xce /1"
       ],
@@ -4571,18 +4653,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st7": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xcf /1"
       ],
@@ -4591,390 +4674,415 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st0": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd0 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st1": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd1 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st2": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd2 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st3": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd3 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st4": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd4 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st5": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd5 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st6": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd6 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st7": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd7 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnu st0, st0": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xd8 /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x0 (0)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st1": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xd9 /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x1 (1)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st2": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xda /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x2 (2)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x2 (2)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st3": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xdb /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x3 (3)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x3 (3)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st4": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xdc /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x4 (4)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x4 (4)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st5": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xdd /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x5 (5)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x5 (5)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st6": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xde /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x6 (6)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x6 (6)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st7": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xdf /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x7 (7)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x7 (7)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fnclex": {
@@ -5009,347 +5117,366 @@
       ]
     },
     "fucomi st0, st0": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0xdb 11b 0xe8 /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w20, vc",
+        "mov x26, x20",
         "axflag",
         "cfinv"
       ]
     },
     "fucomi st0, st1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0xdb 11b 0xe9 /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w20, vc",
+        "mov x26, x20",
         "axflag",
         "cfinv"
       ]
     },
     "fucomi st0, st2": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0xdb 11b 0xea /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w20, vc",
+        "mov x26, x20",
         "axflag",
         "cfinv"
       ]
     },
     "fucomi st0, st3": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0xdb 11b 0xeb /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w20, vc",
+        "mov x26, x20",
         "axflag",
         "cfinv"
       ]
     },
     "fucomi st0, st4": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0xdb 11b 0xec /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w20, vc",
+        "mov x26, x20",
         "axflag",
         "cfinv"
       ]
     },
     "fucomi st0, st5": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0xdb 11b 0xed /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w20, vc",
+        "mov x26, x20",
         "axflag",
         "cfinv"
       ]
     },
     "fucomi st0, st6": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0xdb 11b 0xee /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w20, vc",
+        "mov x26, x20",
         "axflag",
         "cfinv"
       ]
     },
     "fucomi st0, st7": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0xdb 11b 0xef /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w20, vc",
+        "mov x26, x20",
         "axflag",
         "cfinv"
       ]
     },
     "fcomi st0, st0": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0xdb 11b 0xf0 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w20, vc",
+        "mov x26, x20",
         "axflag",
         "cfinv"
       ]
     },
     "fcomi st0, st1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0xdb 11b 0xf1 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w20, vc",
+        "mov x26, x20",
         "axflag",
         "cfinv"
       ]
     },
     "fcomi st0, st2": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0xdb 11b 0xf2 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w20, vc",
+        "mov x26, x20",
         "axflag",
         "cfinv"
       ]
     },
     "fcomi st0, st3": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0xdb 11b 0xf3 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w20, vc",
+        "mov x26, x20",
         "axflag",
         "cfinv"
       ]
     },
     "fcomi st0, st4": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0xdb 11b 0xf4 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w20, vc",
+        "mov x26, x20",
         "axflag",
         "cfinv"
       ]
     },
     "fcomi st0, st5": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0xdb 11b 0xf5 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w20, vc",
+        "mov x26, x20",
         "axflag",
         "cfinv"
       ]
     },
     "fcomi st0, st6": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0xdb 11b 0xf6 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w20, vc",
+        "mov x26, x20",
         "axflag",
         "cfinv"
       ]
     },
     "fcomi st0, st7": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "0xdb 11b 0xf7 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w20, vc",
+        "mov x26, x20",
         "axflag",
         "cfinv"
       ]
     },
     "fadd qword [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xdc !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul qword [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xdc !11b /1"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fcom qword [rax]": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xdc !11b /2"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
@@ -5365,13 +5492,14 @@
       ]
     },
     "fcomp qword [rax]": {
-      "ExpectedInstructionCount": 22,
+      "ExpectedInstructionCount": 23,
       "Comment": [
         "0xdc !11b /3"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
@@ -5386,72 +5514,76 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fsub qword [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xdc !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr qword [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xdc !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv qword [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xdc !11b /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr qword [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xdc !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xc0": {
@@ -5464,14 +5596,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st1, st0": {
@@ -5482,14 +5614,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st2, st0": {
@@ -5500,14 +5632,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st3, st0": {
@@ -5518,14 +5650,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st4, st0": {
@@ -5536,14 +5668,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st5, st0": {
@@ -5554,14 +5686,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st6, st0": {
@@ -5572,14 +5704,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st7, st0": {
@@ -5590,14 +5722,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xc8": {
@@ -5610,14 +5742,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st1, st0": {
@@ -5628,14 +5760,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st2, st0": {
@@ -5646,14 +5778,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st3, st0": {
@@ -5664,14 +5796,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st4, st0": {
@@ -5682,14 +5814,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st5, st0": {
@@ -5700,14 +5832,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st6, st0": {
@@ -5718,14 +5850,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st7, st0": {
@@ -5736,14 +5868,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xe0": {
@@ -5756,14 +5888,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st1, st0": {
@@ -5774,14 +5906,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st2, st0": {
@@ -5792,14 +5924,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st3, st0": {
@@ -5810,14 +5942,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st4, st0": {
@@ -5828,14 +5960,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st5, st0": {
@@ -5846,14 +5978,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st6, st0": {
@@ -5864,14 +5996,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st7, st0": {
@@ -5882,14 +6014,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xe8": {
@@ -5902,14 +6034,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st1, st0": {
@@ -5920,14 +6052,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st2, st0": {
@@ -5938,14 +6070,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st3, st0": {
@@ -5956,14 +6088,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st4, st0": {
@@ -5974,14 +6106,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st5, st0": {
@@ -5992,14 +6124,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st6, st0": {
@@ -6010,14 +6142,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st7, st0": {
@@ -6028,14 +6160,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xf0": {
@@ -6048,14 +6180,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st1, st0": {
@@ -6066,14 +6198,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st2, st0": {
@@ -6084,14 +6216,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st3, st0": {
@@ -6102,14 +6234,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st4, st0": {
@@ -6120,14 +6252,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st5, st0": {
@@ -6138,14 +6270,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st6, st0": {
@@ -6156,14 +6288,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st7, st0": {
@@ -6174,14 +6306,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xf8": {
@@ -6194,14 +6326,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st1, st0": {
@@ -6212,14 +6344,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st2, st0": {
@@ -6230,14 +6362,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st3, st0": {
@@ -6248,14 +6380,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st4, st0": {
@@ -6266,14 +6398,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st5, st0": {
@@ -6284,14 +6416,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st6, st0": {
@@ -6302,14 +6434,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st7, st0": {
@@ -6320,30 +6452,31 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fld qword [rax]": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdd !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -6351,7 +6484,7 @@
       ]
     },
     "fisttp qword [rax]": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdd !11b /1"
       ],
@@ -6360,19 +6493,20 @@
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
         "fcvtzs x21, d2",
-        "str x21, [x4]",
+        "mov x22, x4",
+        "str x21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fst qword [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xdd !11b /2"
       ],
@@ -6380,11 +6514,12 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "str d2, [x4]"
+        "mov x20, x4",
+        "str d2, [x20]"
       ]
     },
     "fstp qword [rax]": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdd !11b /3"
       ],
@@ -6392,87 +6527,90 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "str d2, [x4]",
+        "mov x21, x4",
+        "str d2, [x21]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "frstor [rax]": {
-      "ExpectedInstructionCount": 325,
+      "ExpectedInstructionCount": 328,
       "Comment": [
         "0xdd !11b /4"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x4]",
-        "lsr w21, w20, #10",
-        "and w21, w21, #0x3",
-        "rbit w1, w21",
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "lsr w22, w21, #10",
+        "and w23, w22, #0x3",
+        "rbit w1, w23",
         "lsr w1, w1, #30",
         "mrs x0, fpcr",
         "bfi x0, x1, #22, #2",
-        "lsr x1, x21, #2",
+        "lsr x1, x23, #2",
         "bfi x0, x1, #24, #1",
         "msr fpcr, x0",
-        "strh w20, [x28, #1024]",
-        "strh w20, [x28, #1024]",
-        "ldr w20, [x4, #4]",
-        "ubfx w21, w20, #11, #3",
-        "strb w21, [x28, #747]",
-        "ubfx w22, w20, #8, #1",
-        "ubfx w23, w20, #9, #1",
-        "ubfx w24, w20, #10, #1",
-        "ubfx w20, w20, #14, #1",
-        "strb w22, [x28, #744]",
-        "strb w23, [x28, #745]",
-        "strb w24, [x28, #746]",
-        "strb w20, [x28, #750]",
-        "ldr w20, [x4, #8]",
-        "ubfx w22, w20, #0, #2",
-        "mrs x23, nzcv",
-        "cmp x22, #0x3 (3)",
-        "cset x22, ne",
-        "ubfx w24, w20, #2, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #1",
-        "ubfx w24, w20, #4, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #2",
-        "ubfx w24, w20, #6, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #3",
-        "ubfx w24, w20, #8, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #4",
-        "ubfx w24, w20, #10, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #5",
-        "ubfx w24, w20, #12, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #6",
-        "ubfx w20, w20, #14, #2",
-        "cmp x20, #0x3 (3)",
-        "cset x20, ne",
-        "orr w20, w22, w20, lsl #7",
-        "strb w20, [x28, #1026]",
-        "add x20, x4, #0x1c (28)",
-        "mov x22, #0xffffffffffffffff",
-        "mov w24, #0xffff",
-        "fmov d2, x22",
-        "mov v2.d[1], x24",
-        "ldur q3, [x4, #28]",
-        "and v3.16b, v3.16b, v2.16b",
+        "strh w21, [x28, #1024]",
+        "strh w21, [x28, #1024]",
+        "ldr w21, [x20, #4]",
+        "ubfx w22, w21, #11, #3",
+        "strb w22, [x28, #747]",
+        "ubfx w23, w21, #8, #1",
+        "ubfx w24, w21, #9, #1",
+        "ubfx w25, w21, #10, #1",
+        "ubfx w30, w21, #14, #1",
+        "strb w23, [x28, #744]",
+        "strb w24, [x28, #745]",
+        "strb w25, [x28, #746]",
+        "strb w30, [x28, #750]",
+        "ldr w21, [x20, #8]",
+        "ubfx w23, w21, #0, #2",
+        "mrs x24, nzcv",
+        "cmp x23, #0x3 (3)",
+        "cset x25, ne",
+        "ubfx w23, w21, #2, #2",
+        "cmp x23, #0x3 (3)",
+        "cset x30, ne",
+        "orr w23, w25, w30, lsl #1",
+        "ubfx w25, w21, #4, #2",
+        "cmp x25, #0x3 (3)",
+        "cset x30, ne",
+        "orr w25, w23, w30, lsl #2",
+        "ubfx w23, w21, #6, #2",
+        "cmp x23, #0x3 (3)",
+        "cset x30, ne",
+        "orr w23, w25, w30, lsl #3",
+        "ubfx w25, w21, #8, #2",
+        "cmp x25, #0x3 (3)",
+        "cset x30, ne",
+        "orr w25, w23, w30, lsl #4",
+        "ubfx w23, w21, #10, #2",
+        "cmp x23, #0x3 (3)",
+        "cset x30, ne",
+        "orr w23, w25, w30, lsl #5",
+        "ubfx w25, w21, #12, #2",
+        "cmp x25, #0x3 (3)",
+        "cset x30, ne",
+        "orr w25, w23, w30, lsl #6",
+        "ubfx w23, w21, #14, #2",
+        "cmp x23, #0x3 (3)",
+        "cset x21, ne",
+        "orr w23, w25, w21, lsl #7",
+        "strb w23, [x28, #1026]",
+        "add x21, x20, #0x1c (28)",
+        "mov x23, #0xffffffffffffffff",
+        "mov w25, #0xffff",
+        "fmov d2, x23",
+        "mov v3.16b, v2.16b",
+        "mov v3.d[1], x25",
+        "ldur q2, [x20, #28]",
+        "and v4.16b, v2.16b, v3.16b",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -6485,8 +6623,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
+        "mov x1, v4.d[0]",
+        "umov w2, v4.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -6498,14 +6636,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v3.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
-        "str d3, [x0, #768]",
-        "add x22, x20, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x20, #10]",
-        "and v3.16b, v3.16b, v2.16b",
+        "mov v2.8b, v0.8b",
+        "add x0, x28, x22, lsl #4",
+        "str d2, [x0, #768]",
+        "add x20, x21, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x21, #10]",
+        "and v4.16b, v2.16b, v3.16b",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -6518,8 +6656,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
+        "mov x1, v4.d[0]",
+        "umov w2, v4.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -6531,14 +6669,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v3.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
-        "str d3, [x0, #768]",
-        "add x20, x22, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x22, #10]",
-        "and v3.16b, v3.16b, v2.16b",
+        "mov v2.8b, v0.8b",
+        "add x0, x28, x22, lsl #4",
+        "str d2, [x0, #768]",
+        "add x21, x20, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x20, #10]",
+        "and v4.16b, v2.16b, v3.16b",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -6551,8 +6689,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
+        "mov x1, v4.d[0]",
+        "umov w2, v4.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -6564,14 +6702,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v3.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
-        "str d3, [x0, #768]",
-        "add x22, x20, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x20, #10]",
-        "and v3.16b, v3.16b, v2.16b",
+        "mov v2.8b, v0.8b",
+        "add x0, x28, x22, lsl #4",
+        "str d2, [x0, #768]",
+        "add x20, x21, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x21, #10]",
+        "and v4.16b, v2.16b, v3.16b",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -6584,8 +6722,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
+        "mov x1, v4.d[0]",
+        "umov w2, v4.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -6597,14 +6735,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v3.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
-        "str d3, [x0, #768]",
-        "add x20, x22, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x22, #10]",
-        "and v3.16b, v3.16b, v2.16b",
+        "mov v2.8b, v0.8b",
+        "add x0, x28, x22, lsl #4",
+        "str d2, [x0, #768]",
+        "add x21, x20, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x20, #10]",
+        "and v4.16b, v2.16b, v3.16b",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -6617,8 +6755,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
+        "mov x1, v4.d[0]",
+        "umov w2, v4.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -6630,14 +6768,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v3.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
-        "str d3, [x0, #768]",
-        "add x22, x20, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x20, #10]",
-        "and v3.16b, v3.16b, v2.16b",
+        "mov v2.8b, v0.8b",
+        "add x0, x28, x22, lsl #4",
+        "str d2, [x0, #768]",
+        "add x20, x21, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x21, #10]",
+        "and v4.16b, v2.16b, v3.16b",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -6650,8 +6788,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
+        "mov x1, v4.d[0]",
+        "umov w2, v4.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -6663,14 +6801,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v3.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
-        "str d3, [x0, #768]",
-        "add x20, x22, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x22, #10]",
-        "and v2.16b, v3.16b, v2.16b",
+        "mov v2.8b, v0.8b",
+        "add x0, x28, x22, lsl #4",
+        "str d2, [x0, #768]",
+        "add x21, x20, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x20, #10]",
+        "and v4.16b, v2.16b, v3.16b",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -6683,8 +6821,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
+        "mov x1, v4.d[0]",
+        "umov w2, v4.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -6697,14 +6835,15 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "mov v2.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str d2, [x0, #768]",
-        "add x22, x20, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur d2, [x20, #10]",
-        "ldr h3, [x22, #8]",
-        "mov v2.h[4], v3.h[0]",
+        "add x20, x21, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur d2, [x21, #10]",
+        "ldr h3, [x20, #8]",
+        "mov v4.16b, v2.16b",
+        "mov v4.h[4], v3.h[0]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -6717,8 +6856,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
+        "mov x1, v4.d[0]",
+        "umov w2, v4.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -6731,81 +6870,85 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "mov v2.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str d2, [x0, #768]",
-        "msr nzcv, x23"
+        "msr nzcv, x24"
       ]
     },
     "fnsave [rax]": {
-      "ExpectedInstructionCount": 335,
+      "ExpectedInstructionCount": 340,
       "Comment": [
         "0xdd !11b /6"
       ],
       "ExpectedArm64ASM": [
-        "ldrb w20, [x28, #747]",
-        "ldrh w21, [x28, #1024]",
-        "str w21, [x4]",
-        "mov w21, #0x0",
-        "mov x22, x21",
-        "bfi x22, x20, #11, #3",
-        "ldrb w23, [x28, #744]",
-        "ldrb w24, [x28, #745]",
-        "ldrb w25, [x28, #746]",
-        "ldrb w30, [x28, #750]",
-        "orr x22, x22, x23, lsl #8",
-        "orr x22, x22, x24, lsl #9",
-        "orr x22, x22, x25, lsl #10",
-        "orr x22, x22, x30, lsl #14",
-        "str w22, [x4, #4]",
-        "ldrb w22, [x28, #1026]",
-        "and w23, w22, #0x1",
+        "sub sp, sp, #0x20 (32)",
+        "mov x20, x4",
+        "ldrb w21, [x28, #747]",
+        "ldrh w22, [x28, #1024]",
+        "str w22, [x20]",
+        "mov w22, #0x0",
+        "mov x23, x22",
+        "bfi x23, x21, #11, #3",
+        "ldrb w24, [x28, #744]",
+        "ldrb w25, [x28, #745]",
+        "ldrb w30, [x28, #746]",
+        "ldrb w18, [x28, #750]",
+        "strb w21, [sp]",
+        "orr x21, x23, x24, lsl #8",
+        "orr x23, x21, x25, lsl #9",
+        "orr x21, x23, x30, lsl #10",
+        "orr x23, x21, x18, lsl #14",
+        "str w23, [x20, #4]",
+        "ldrb w21, [x28, #1026]",
+        "and w23, w21, #0x1",
         "mov w24, #0x3",
         "mrs x25, nzcv",
         "cmp x23, #0x0 (0)",
-        "csel x23, x24, x21, eq",
-        "orr w23, w21, w23",
-        "lsr w30, w22, #1",
-        "and w30, w30, #0x1",
+        "csel x30, x24, x22, eq",
+        "orr w23, w22, w30",
+        "lsr w30, w21, #1",
+        "and w18, w30, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x30, x24, x22, eq",
+        "orr w18, w23, w30, lsl #2",
+        "lsr w23, w21, #2",
+        "and w30, w23, #0x1",
         "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #2",
-        "lsr w30, w22, #2",
-        "and w30, w30, #0x1",
-        "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #4",
-        "lsr w30, w22, #3",
-        "and w30, w30, #0x1",
-        "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #6",
-        "lsr w30, w22, #4",
-        "and w30, w30, #0x1",
-        "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #8",
-        "lsr w30, w22, #5",
-        "and w30, w30, #0x1",
+        "csel x23, x24, x22, eq",
+        "orr w30, w18, w23, lsl #4",
+        "lsr w23, w21, #3",
+        "and w18, w23, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x23, x24, x22, eq",
+        "orr w18, w30, w23, lsl #6",
+        "lsr w23, w21, #4",
+        "and w30, w23, #0x1",
         "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #10",
-        "lsr w30, w22, #6",
-        "and w30, w30, #0x1",
+        "csel x23, x24, x22, eq",
+        "orr w30, w18, w23, lsl #8",
+        "lsr w23, w21, #5",
+        "and w18, w23, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x23, x24, x22, eq",
+        "orr w18, w30, w23, lsl #10",
+        "lsr w23, w21, #6",
+        "and w30, w23, #0x1",
         "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #12",
-        "lsr w22, w22, #7",
-        "and w22, w22, #0x1",
-        "cmp x22, #0x0 (0)",
-        "csel x22, x24, x21, eq",
-        "orr w22, w23, w22, lsl #14",
-        "str w22, [x4, #8]",
-        "str w21, [x4, #12]",
-        "str w21, [x4, #16]",
-        "str w21, [x4, #20]",
-        "str w21, [x4, #24]",
-        "add x22, x4, #0x1c (28)",
-        "add x0, x28, x20, lsl #4",
+        "csel x23, x24, x22, eq",
+        "orr w30, w18, w23, lsl #12",
+        "lsr w23, w21, #7",
+        "and w21, w23, #0x1",
+        "cmp x21, #0x0 (0)",
+        "csel x23, x24, x22, eq",
+        "orr w21, w30, w23, lsl #14",
+        "str w21, [x20, #8]",
+        "str w22, [x20, #12]",
+        "str w22, [x20, #16]",
+        "str w22, [x20, #20]",
+        "str w22, [x20, #24]",
+        "add x21, x20, #0x1c (28)",
+        "ldrb w23, [sp]",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -6831,14 +6974,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "stur q2, [x4, #28]",
-        "add x23, x22, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "stur q3, [x20, #28]",
+        "add x20, x21, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -6864,14 +7007,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "stur q2, [x22, #10]",
-        "add x22, x23, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "stur q3, [x21, #10]",
+        "add x21, x20, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -6897,14 +7040,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "stur q2, [x23, #10]",
-        "add x23, x22, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "stur q3, [x20, #10]",
+        "add x20, x21, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -6930,14 +7073,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "stur q2, [x22, #10]",
-        "add x22, x23, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "stur q3, [x21, #10]",
+        "add x21, x20, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -6963,14 +7106,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "stur q2, [x23, #10]",
-        "add x23, x22, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "stur q3, [x20, #10]",
+        "add x20, x21, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -6996,14 +7139,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "stur q2, [x22, #10]",
-        "add x22, x23, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "stur q3, [x21, #10]",
+        "add x21, x20, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -7029,14 +7172,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "stur q2, [x23, #10]",
-        "add x23, x22, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "stur q3, [x20, #10]",
+        "add x20, x21, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -7062,41 +7205,44 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "stur d2, [x22, #10]",
-        "dup v2.8h, v2.h[4]",
-        "str h2, [x23, #8]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "stur d3, [x21, #10]",
+        "dup v2.8h, v3.h[4]",
+        "str h2, [x20, #8]",
         "mov w20, #0x37f",
         "strh w20, [x28, #1024]",
-        "strb w21, [x28, #747]",
-        "strb w21, [x28, #744]",
-        "strb w21, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w21, [x28, #750]",
-        "strb w21, [x28, #1026]",
-        "msr nzcv, x25"
+        "strb w22, [x28, #747]",
+        "strb w22, [x28, #744]",
+        "strb w22, [x28, #745]",
+        "strb w22, [x28, #746]",
+        "strb w22, [x28, #750]",
+        "strb w22, [x28, #1026]",
+        "msr nzcv, x25",
+        "add sp, sp, #0x20 (32)"
       ]
     },
     "fnstsw [rax]": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdd !11b /7"
       ],
       "ExpectedArm64ASM": [
         "mov w20, #0x0",
         "ldrb w21, [x28, #747]",
-        "bfi x20, x21, #11, #3",
-        "ldrb w21, [x28, #744]",
-        "ldrb w22, [x28, #745]",
+        "mov x22, x20",
+        "bfi x22, x21, #11, #3",
+        "ldrb w20, [x28, #744]",
+        "ldrb w21, [x28, #745]",
         "ldrb w23, [x28, #746]",
         "ldrb w24, [x28, #750]",
-        "orr x20, x20, x21, lsl #8",
-        "orr x20, x20, x22, lsl #9",
-        "orr x20, x20, x23, lsl #10",
-        "orr x20, x20, x24, lsl #14",
-        "strh w20, [x4]"
+        "orr x25, x22, x20, lsl #8",
+        "orr x20, x25, x21, lsl #9",
+        "orr x21, x20, x23, lsl #10",
+        "orr x20, x21, x24, lsl #14",
+        "mov x21, x4",
+        "strh w20, [x21]"
       ]
     },
     "ffree st0": {
@@ -7106,12 +7252,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x0 (0)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x0 (0)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -7123,11 +7269,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w20, w21, w20",
-        "bic w20, w22, w20",
+        "lsl w23, w21, w20",
+        "bic w20, w22, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -7138,12 +7284,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x2 (2)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x2 (2)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -7154,12 +7300,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x3 (3)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x3 (3)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -7170,12 +7316,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x4 (4)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x4 (4)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -7186,12 +7332,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x5 (5)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x5 (5)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -7202,12 +7348,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x6 (6)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x6 (6)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -7218,12 +7364,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x7 (7)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x7 (7)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -7235,10 +7381,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -7250,10 +7396,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -7265,10 +7411,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -7280,10 +7426,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -7295,10 +7441,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -7310,10 +7456,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -7325,10 +7471,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -7340,10 +7486,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -7355,18 +7501,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7379,17 +7525,17 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
+        "and w23, w22, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7401,18 +7547,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7424,18 +7570,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7447,19 +7593,19 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "strb w20, [x28, #747]"
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "strb w20, [x28, #747]"
       ]
     },
     "fstp st5": {
@@ -7470,18 +7616,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7493,18 +7639,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7516,18 +7662,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7540,8 +7686,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x0",
         "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7564,8 +7710,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7589,8 +7735,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7614,8 +7760,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7639,8 +7785,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7664,8 +7810,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7689,8 +7835,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7714,8 +7860,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7740,8 +7886,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x0",
         "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7756,11 +7902,11 @@
         "strb w21, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7773,8 +7919,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7789,11 +7935,11 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7805,8 +7951,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7822,11 +7968,11 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7838,8 +7984,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7855,11 +8001,11 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7871,8 +8017,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7888,11 +8034,11 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7904,8 +8050,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7921,11 +8067,11 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7937,8 +8083,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7954,11 +8100,11 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7970,8 +8116,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7987,57 +8133,60 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fiadd word [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0xde !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, w21",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fimul word [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0xde !11b /1"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, w21",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "ficom word [rax]": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "0xde !11b /2"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, w21",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -8054,14 +8203,15 @@
       ]
     },
     "ficomp word [rax]": {
-      "ExpectedInstructionCount": 24,
+      "ExpectedInstructionCount": 25,
       "Comment": [
         "0xde !11b /3"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, w21",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -8077,80 +8227,84 @@
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fisub word [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0xde !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, w21",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fisubr word [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0xde !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, w21",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fidiv word [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0xde !11b /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, w21",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fidivr word [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0xde !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, w21",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "faddp st0": {
@@ -8161,22 +8315,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fadd d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "faddp st1": {
@@ -8188,21 +8342,21 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "fadd d4, d3, d2",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "faddp st2": {
@@ -8213,22 +8367,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fadd d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "faddp st3": {
@@ -8239,22 +8393,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fadd d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "faddp st4": {
@@ -8265,22 +8419,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fadd d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "faddp st5": {
@@ -8291,22 +8445,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fadd d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "faddp st6": {
@@ -8317,22 +8471,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fadd d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "faddp st7": {
@@ -8343,22 +8497,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fadd d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmulp st0": {
@@ -8369,22 +8523,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fmul d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmulp st1": {
@@ -8396,21 +8550,21 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "fmul d4, d3, d2",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmulp st2": {
@@ -8421,22 +8575,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fmul d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmulp st3": {
@@ -8447,22 +8601,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fmul d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmulp st4": {
@@ -8473,22 +8627,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fmul d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmulp st5": {
@@ -8499,22 +8653,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fmul d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmulp st6": {
@@ -8525,22 +8679,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fmul d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmulp st7": {
@@ -8551,22 +8705,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fmul d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fcompp": {
@@ -8578,8 +8732,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -8595,15 +8749,15 @@
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
         "lsl w23, w21, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "bic w24, w22, w23",
+        "strb w24, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "lsl w22, w21, w20",
+        "bic w21, w24, w22",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -8617,22 +8771,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubrp st1, st0": {
@@ -8644,21 +8798,21 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "fsub d4, d3, d2",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubrp st2, st0": {
@@ -8669,22 +8823,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubrp st3, st0": {
@@ -8695,22 +8849,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubrp st4, st0": {
@@ -8721,22 +8875,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubrp st5, st0": {
@@ -8747,22 +8901,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubrp st6, st0": {
@@ -8773,22 +8927,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubrp st7, st0": {
@@ -8799,22 +8953,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xde, 0xe8": {
@@ -8827,22 +8981,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubp st1, st0": {
@@ -8854,21 +9008,21 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "fsub d4, d2, d3",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubp st2, st0": {
@@ -8879,22 +9033,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubp st3, st0": {
@@ -8905,22 +9059,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubp st4, st0": {
@@ -8931,22 +9085,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubp st5, st0": {
@@ -8957,22 +9111,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubp st6, st0": {
@@ -8983,22 +9137,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubp st7, st0": {
@@ -9009,22 +9163,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xde, 0xf0": {
@@ -9037,22 +9191,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivrp st1, st0": {
@@ -9064,21 +9218,21 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "fdiv d4, d3, d2",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivrp st2, st0": {
@@ -9089,22 +9243,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivrp st3, st0": {
@@ -9115,22 +9269,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivrp st4, st0": {
@@ -9141,22 +9295,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivrp st5, st0": {
@@ -9167,22 +9321,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivrp st6, st0": {
@@ -9193,22 +9347,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivrp st7, st0": {
@@ -9219,22 +9373,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xde, 0xf8": {
@@ -9247,22 +9401,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivp st1, st0": {
@@ -9274,21 +9428,21 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "fdiv d4, d2, d3",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivp st2, st0": {
@@ -9299,22 +9453,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivp st3, st0": {
@@ -9325,22 +9479,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivp st4, st0": {
@@ -9351,22 +9505,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivp st5, st0": {
@@ -9377,22 +9531,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivp st6, st0": {
@@ -9403,22 +9557,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivp st7, st0": {
@@ -9429,48 +9583,49 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fild word [rax]": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xdf !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, x21",
         "add x0, x28, x20, lsl #4",
         "str d2, [x0, #768]"
       ]
     },
     "fisttp word [rax]": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdf !11b /1"
       ],
@@ -9479,19 +9634,20 @@
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
         "fcvtzs x21, d2",
-        "strh w21, [x4]",
+        "mov x22, x4",
+        "strh w21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fist word [rax]": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xdf !11b /2"
       ],
@@ -9501,11 +9657,12 @@
         "ldr d2, [x0, #768]",
         "frinti d0, d2",
         "fcvtzs x20, d0",
-        "strh w20, [x4]"
+        "mov x21, x4",
+        "strh w20, [x21]"
       ]
     },
     "fistp word [rax]": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xdf !11b /3"
       ],
@@ -9515,33 +9672,35 @@
         "ldr d2, [x0, #768]",
         "frinti d0, d2",
         "fcvtzs x21, d0",
-        "strh w21, [x4]",
+        "mov x22, x4",
+        "strh w21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fbld tword [rax]": {
-      "ExpectedInstructionCount": 66,
+      "ExpectedInstructionCount": 67,
       "Comment": [
         "0xdf !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
-        "ldr q2, [x4]",
+        "mov x21, x4",
+        "ldr q2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -9567,9 +9726,9 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -9582,8 +9741,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
+        "mov x1, v3.d[0]",
+        "umov w2, v3.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -9601,7 +9760,7 @@
       ]
     },
     "fbstp tword [rax]": {
-      "ExpectedInstructionCount": 69,
+      "ExpectedInstructionCount": 70,
       "Comment": [
         "0xdf !11b /6"
       ],
@@ -9633,9 +9792,9 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -9648,8 +9807,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
+        "mov x1, v3.d[0]",
+        "umov w2, v3.h[4]",
         "ldr x3, [x28, #1392]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -9664,16 +9823,17 @@
         "eor v2.16b, v2.16b, v2.16b",
         "mov v2.d[0], x0",
         "mov v2.h[4], w1",
-        "str d2, [x4]",
-        "mov x21, v2.d[1]",
-        "strh w21, [x4, #8]",
+        "mov x21, x4",
+        "str d2, [x21]",
+        "mov x22, v2.d[1]",
+        "strh w22, [x21, #8]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -9684,8 +9844,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -9696,8 +9856,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -9708,8 +9868,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -9720,8 +9880,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -9732,8 +9892,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -9744,8 +9904,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -9756,8 +9916,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -9768,60 +9928,65 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fnstsw ax": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 16,
       "Comment": [
         "0xdf 11b 0xe0 /4"
       ],
       "ExpectedArm64ASM": [
         "mov w20, #0x0",
         "ldrb w21, [x28, #747]",
-        "bfi x20, x21, #11, #3",
-        "ldrb w21, [x28, #744]",
-        "ldrb w22, [x28, #745]",
+        "mov x22, x20",
+        "bfi x22, x21, #11, #3",
+        "ldrb w20, [x28, #744]",
+        "ldrb w21, [x28, #745]",
         "ldrb w23, [x28, #746]",
         "ldrb w24, [x28, #750]",
-        "orr x20, x20, x21, lsl #8",
-        "orr x20, x20, x22, lsl #9",
-        "orr x20, x20, x23, lsl #10",
-        "orr x20, x20, x24, lsl #14",
-        "bfxil x4, x20, #0, #16"
+        "orr x25, x22, x20, lsl #8",
+        "orr x20, x25, x21, lsl #9",
+        "orr x21, x20, x23, lsl #10",
+        "orr x20, x21, x24, lsl #14",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "fucomip st0": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdf 11b 0xe8 /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
         "mov w21, #0x1",
-        "cset w26, vc",
+        "cset w22, vc",
+        "mov x26, x22",
         "axflag",
         "cfinv",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fucomip st1": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdf 11b 0xe9 /5"
       ],
@@ -9829,215 +9994,223 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w22, vc",
+        "mov x26, x22",
         "axflag",
         "cfinv",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fucomip st2": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdf 11b 0xea /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
         "mov w21, #0x1",
-        "cset w26, vc",
+        "cset w22, vc",
+        "mov x26, x22",
         "axflag",
         "cfinv",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fucomip st3": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdf 11b 0xeb /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
         "mov w21, #0x1",
-        "cset w26, vc",
+        "cset w22, vc",
+        "mov x26, x22",
         "axflag",
         "cfinv",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fucomip st4": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdf 11b 0xec /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
         "mov w21, #0x1",
-        "cset w26, vc",
+        "cset w22, vc",
+        "mov x26, x22",
         "axflag",
         "cfinv",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fucomip st5": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdf 11b 0xed /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
         "mov w21, #0x1",
-        "cset w26, vc",
+        "cset w22, vc",
+        "mov x26, x22",
         "axflag",
         "cfinv",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fucomip st6": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdf 11b 0xee /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
         "mov w21, #0x1",
-        "cset w26, vc",
+        "cset w22, vc",
+        "mov x26, x22",
         "axflag",
         "cfinv",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fucomip st7": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdf 11b 0xef /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
         "mov w21, #0x1",
-        "cset w26, vc",
+        "cset w22, vc",
+        "mov x26, x22",
         "axflag",
         "cfinv",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcomip st0": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdf 11b 0xf0 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
         "mov w21, #0x1",
-        "cset w26, vc",
+        "cset w22, vc",
+        "mov x26, x22",
         "axflag",
         "cfinv",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcomip st1": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdf 11b 0xf1 /6"
       ],
@@ -10045,183 +10218,190 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
-        "cset w26, vc",
+        "cset w22, vc",
+        "mov x26, x22",
         "axflag",
         "cfinv",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcomip st2": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdf 11b 0xf2 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
         "mov w21, #0x1",
-        "cset w26, vc",
+        "cset w22, vc",
+        "mov x26, x22",
         "axflag",
         "cfinv",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcomip st3": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdf 11b 0xf3 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
         "mov w21, #0x1",
-        "cset w26, vc",
+        "cset w22, vc",
+        "mov x26, x22",
         "axflag",
         "cfinv",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcomip st4": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdf 11b 0xf4 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
         "mov w21, #0x1",
-        "cset w26, vc",
+        "cset w22, vc",
+        "mov x26, x22",
         "axflag",
         "cfinv",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcomip st5": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdf 11b 0xf5 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
         "mov w21, #0x1",
-        "cset w26, vc",
+        "cset w22, vc",
+        "mov x26, x22",
         "axflag",
         "cfinv",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcomip st6": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdf 11b 0xf6 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
         "mov w21, #0x1",
-        "cset w26, vc",
+        "cset w22, vc",
+        "mov x26, x22",
         "axflag",
         "cfinv",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcomip st7": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdf 11b 0xf7 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
         "mov w21, #0x1",
-        "cset w26, vc",
+        "cset w22, vc",
+        "mov x26, x22",
         "axflag",
         "cfinv",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     }
diff --git a/unittests/InstructionCountCI/H0F38.json b/unittests/InstructionCountCI/H0F38.json
index 2fb8062559..3219dca384 100644
--- a/unittests/InstructionCountCI/H0F38.json
+++ b/unittests/InstructionCountCI/H0F38.json
@@ -20,20 +20,23 @@
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
         "movi v4.16b, #0x87",
-        "and v3.16b, v3.16b, v4.16b",
-        "tbl v2.8b, {v2.16b}, v3.8b",
-        "str d2, [x28, #768]"
+        "and v5.16b, v3.16b, v4.16b",
+        "tbl v3.8b, {v2.16b}, v5.8b",
+        "str d3, [x28, #768]"
       ]
     },
     "pshufb xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x66 0x0f 0x38 0x00"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.16b, #0x8f",
-        "and v2.16b, v17.16b, v2.16b",
-        "tbl v16.16b, {v16.16b}, v2.16b"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "movi v4.16b, #0x8f",
+        "and v5.16b, v3.16b, v4.16b",
+        "tbl v3.16b, {v2.16b}, v5.16b",
+        "mov v16.16b, v3.16b"
       ]
     },
     "phaddw mm0, mm1": {
@@ -44,17 +47,20 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "addp v2.4h, v3.4h, v2.4h",
-        "str d2, [x28, #768]"
+        "addp v4.4h, v3.4h, v2.4h",
+        "str d4, [x28, #768]"
       ]
     },
     "phaddw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x38 0x01"
       ],
       "ExpectedArm64ASM": [
-        "addp v16.8h, v16.8h, v17.8h"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "addp v4.8h, v3.8h, v2.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "phaddd mm0, mm1": {
@@ -65,17 +71,20 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "addp v2.2s, v3.2s, v2.2s",
-        "str d2, [x28, #768]"
+        "addp v4.2s, v3.2s, v2.2s",
+        "str d4, [x28, #768]"
       ]
     },
     "phaddd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x38 0x02"
       ],
       "ExpectedArm64ASM": [
-        "addp v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "addp v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "phaddsw mm0, mm1": {
@@ -87,20 +96,23 @@
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
         "uzp1 v4.4h, v2.4h, v3.4h",
-        "uzp2 v2.4h, v2.4h, v3.4h",
-        "sqadd v2.8h, v4.8h, v2.8h",
+        "uzp2 v5.4h, v2.4h, v3.4h",
+        "sqadd v2.8h, v4.8h, v5.8h",
         "str d2, [x28, #768]"
       ]
     },
     "phaddsw xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x66 0x0f 0x38 0x03"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 v2.8h, v16.8h, v17.8h",
-        "uzp2 v3.8h, v16.8h, v17.8h",
-        "sqadd v16.8h, v2.8h, v3.8h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "uzp1 v4.8h, v2.8h, v3.8h",
+        "uzp2 v5.8h, v2.8h, v3.8h",
+        "sqadd v2.8h, v4.8h, v5.8h",
+        "mov v16.16b, v2.16b"
       ]
     },
     "pmaddubsw mm0, mm1": {
@@ -111,17 +123,17 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "uxtl v2.8h, v2.8b",
-        "sxtl v3.8h, v3.8b",
-        "smull v4.4s, v2.4h, v3.4h",
-        "smull2 v2.4s, v2.8h, v3.8h",
-        "addp v2.4s, v4.4s, v2.4s",
-        "sqxtn v2.4h, v2.4s",
-        "str d2, [x28, #768]"
+        "uxtl v4.8h, v2.8b",
+        "sxtl v2.8h, v3.8b",
+        "smull v3.4s, v4.4h, v2.4h",
+        "smull2 v5.4s, v4.8h, v2.8h",
+        "addp v2.4s, v3.4s, v5.4s",
+        "sqxtn v3.4h, v2.4s",
+        "str d3, [x28, #768]"
       ]
     },
     "pmaddubsw xmm0, xmm1": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "{u,s}xtl{,2} and uzp{1,2} can be more optimal",
         "Up-front zero extend and sign extend the elements in place",
@@ -130,15 +142,18 @@
         "0x66 0x0f 0x38 0x04"
       ],
       "ExpectedArm64ASM": [
-        "uxtl v2.8h, v16.8b",
-        "sxtl v3.8h, v17.8b",
-        "mul v2.8h, v2.8h, v3.8h",
-        "uxtl2 v3.8h, v16.16b",
-        "sxtl2 v4.8h, v17.16b",
-        "mul v3.8h, v3.8h, v4.8h",
-        "uzp1 v4.8h, v2.8h, v3.8h",
-        "uzp2 v2.8h, v2.8h, v3.8h",
-        "sqadd v16.8h, v4.8h, v2.8h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "uxtl v4.8h, v2.8b",
+        "sxtl v5.8h, v3.8b",
+        "mul v6.8h, v4.8h, v5.8h",
+        "uxtl2 v4.8h, v2.16b",
+        "sxtl2 v2.8h, v3.16b",
+        "mul v3.8h, v4.8h, v2.8h",
+        "uzp1 v2.8h, v6.8h, v3.8h",
+        "uzp2 v4.8h, v6.8h, v3.8h",
+        "sqadd v3.8h, v2.8h, v4.8h",
+        "mov v16.16b, v3.16b"
       ]
     },
     "phsubw mm0, mm1": {
@@ -150,20 +165,23 @@
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
         "uzp1 v4.4h, v2.4h, v3.4h",
-        "uzp2 v2.4h, v2.4h, v3.4h",
-        "sub v2.8h, v4.8h, v2.8h",
+        "uzp2 v5.4h, v2.4h, v3.4h",
+        "sub v2.8h, v4.8h, v5.8h",
         "str d2, [x28, #768]"
       ]
     },
     "phsubw xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x66 0x0f 0x38 0x05"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 v2.8h, v16.8h, v17.8h",
-        "uzp2 v3.8h, v16.8h, v17.8h",
-        "sub v16.8h, v2.8h, v3.8h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "uzp1 v4.8h, v2.8h, v3.8h",
+        "uzp2 v5.8h, v2.8h, v3.8h",
+        "sub v2.8h, v4.8h, v5.8h",
+        "mov v16.16b, v2.16b"
       ]
     },
     "phsubd mm0, mm1": {
@@ -175,20 +193,23 @@
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
         "uzp1 v4.2s, v2.2s, v3.2s",
-        "uzp2 v2.2s, v2.2s, v3.2s",
-        "sub v2.4s, v4.4s, v2.4s",
+        "uzp2 v5.2s, v2.2s, v3.2s",
+        "sub v2.4s, v4.4s, v5.4s",
         "str d2, [x28, #768]"
       ]
     },
     "phsubd xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x66 0x0f 0x38 0x06"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 v2.4s, v16.4s, v17.4s",
-        "uzp2 v3.4s, v16.4s, v17.4s",
-        "sub v16.4s, v2.4s, v3.4s"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "uzp1 v4.4s, v2.4s, v3.4s",
+        "uzp2 v5.4s, v2.4s, v3.4s",
+        "sub v2.4s, v4.4s, v5.4s",
+        "mov v16.16b, v2.16b"
       ]
     },
     "phsubsw mm0, mm1": {
@@ -200,20 +221,23 @@
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
         "uzp1 v4.4h, v2.4h, v3.4h",
-        "uzp2 v2.4h, v2.4h, v3.4h",
-        "sqsub v2.8h, v4.8h, v2.8h",
+        "uzp2 v5.4h, v2.4h, v3.4h",
+        "sqsub v2.8h, v4.8h, v5.8h",
         "str d2, [x28, #768]"
       ]
     },
     "phsubsw xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x66 0x0f 0x38 0x07"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 v2.8h, v16.8h, v17.8h",
-        "uzp2 v3.8h, v16.8h, v17.8h",
-        "sqsub v16.8h, v2.8h, v3.8h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "uzp1 v4.8h, v2.8h, v3.8h",
+        "uzp2 v5.8h, v2.8h, v3.8h",
+        "sqsub v2.8h, v4.8h, v5.8h",
+        "mov v16.16b, v2.16b"
       ]
     },
     "psignb mm0, mm1": {
@@ -224,21 +248,24 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "sqshl v2.8b, v2.8b, #7",
-        "srshr v2.8b, v2.8b, #7",
-        "mul v2.16b, v3.16b, v2.16b",
-        "str d2, [x28, #768]"
+        "sqshl v4.8b, v2.8b, #7",
+        "srshr v2.8b, v4.8b, #7",
+        "mul v4.16b, v3.16b, v2.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "psignb xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x66 0x0f 0x38 0x08"
       ],
       "ExpectedArm64ASM": [
-        "sqshl v2.16b, v17.16b, #7",
-        "srshr v2.16b, v2.16b, #7",
-        "mul v16.16b, v16.16b, v2.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "sqshl v4.16b, v2.16b, #7",
+        "srshr v2.16b, v4.16b, #7",
+        "mul v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psignw mm0, mm1": {
@@ -249,21 +276,24 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "sqshl v2.4h, v2.4h, #15",
-        "srshr v2.4h, v2.4h, #15",
-        "mul v2.8h, v3.8h, v2.8h",
-        "str d2, [x28, #768]"
+        "sqshl v4.4h, v2.4h, #15",
+        "srshr v2.4h, v4.4h, #15",
+        "mul v4.8h, v3.8h, v2.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "psignw xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x66 0x0f 0x38 0x09"
       ],
       "ExpectedArm64ASM": [
-        "sqshl v2.8h, v17.8h, #15",
-        "srshr v2.8h, v2.8h, #15",
-        "mul v16.8h, v16.8h, v2.8h"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "sqshl v4.8h, v2.8h, #15",
+        "srshr v2.8h, v4.8h, #15",
+        "mul v4.8h, v3.8h, v2.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psignd mm0, mm1": {
@@ -274,21 +304,24 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "sqshl v2.2s, v2.2s, #31",
-        "srshr v2.2s, v2.2s, #31",
-        "mul v2.4s, v3.4s, v2.4s",
-        "str d2, [x28, #768]"
+        "sqshl v4.2s, v2.2s, #31",
+        "srshr v2.2s, v4.2s, #31",
+        "mul v4.4s, v3.4s, v2.4s",
+        "str d4, [x28, #768]"
       ]
     },
     "psignd xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x66 0x0f 0x38 0x0a"
       ],
       "ExpectedArm64ASM": [
-        "sqshl v2.4s, v17.4s, #31",
-        "srshr v2.4s, v2.4s, #31",
-        "mul v16.4s, v16.4s, v2.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "sqshl v4.4s, v2.4s, #31",
+        "srshr v2.4s, v4.4s, #31",
+        "mul v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pmulhrsw mm0, mm1": {
@@ -300,114 +333,148 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "smull v2.4s, v2.4h, v3.4h",
-        "sshr v2.4s, v2.4s, #14",
+        "smull v4.4s, v2.4h, v3.4h",
+        "sshr v2.4s, v4.4s, #14",
         "movi v3.4s, #0x1, lsl #0",
-        "add v2.4s, v2.4s, v3.4s",
-        "shrn v2.4h, v2.4s, #1",
+        "add v4.4s, v2.4s, v3.4s",
+        "shrn v2.4h, v4.4s, #1",
         "str d2, [x28, #768]"
       ]
     },
     "pmulhrsw xmm0, xmm1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "Might be able to use sqdmulh",
         "0x66 0x0f 0x38 0x0b"
       ],
       "ExpectedArm64ASM": [
-        "smull v2.4s, v16.4h, v17.4h",
-        "smull2 v3.4s, v16.8h, v17.8h",
-        "sshr v2.4s, v2.4s, #14",
-        "sshr v3.4s, v3.4s, #14",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "smull v4.4s, v2.4h, v3.4h",
+        "smull2 v5.4s, v2.8h, v3.8h",
+        "sshr v2.4s, v4.4s, #14",
+        "sshr v3.4s, v5.4s, #14",
         "movi v4.4s, #0x1, lsl #0",
-        "add v2.4s, v2.4s, v4.4s",
-        "add v3.4s, v3.4s, v4.4s",
-        "shrn v2.4h, v2.4s, #1",
-        "mov v0.16b, v2.16b",
-        "shrn2 v0.8h, v3.4s, #1",
-        "mov v16.16b, v0.16b"
+        "add v5.4s, v2.4s, v4.4s",
+        "add v2.4s, v3.4s, v4.4s",
+        "shrn v3.4h, v5.4s, #1",
+        "mov v0.16b, v3.16b",
+        "shrn2 v0.8h, v2.4s, #1",
+        "mov v4.16b, v0.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pblendvb xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x66 0x0f 0x38 0x10"
       ],
       "ExpectedArm64ASM": [
-        "sshr v2.16b, v16.16b, #7",
-        "bit v16.16b, v17.16b, v2.16b"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "sshr v4.16b, v2.16b, #7",
+        "mov v5.16b, v4.16b",
+        "bsl v5.16b, v3.16b, v2.16b",
+        "mov v16.16b, v5.16b"
       ]
     },
     "blendvps xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x66 0x0f 0x38 0x14"
       ],
       "ExpectedArm64ASM": [
-        "sshr v2.4s, v16.4s, #31",
-        "bit v16.16b, v17.16b, v2.16b"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "sshr v4.4s, v2.4s, #31",
+        "mov v5.16b, v4.16b",
+        "bsl v5.16b, v3.16b, v2.16b",
+        "mov v16.16b, v5.16b"
       ]
     },
     "blendvpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x66 0x0f 0x38 0x15"
       ],
       "ExpectedArm64ASM": [
-        "sshr v2.2d, v16.2d, #63",
-        "bit v16.16b, v17.16b, v2.16b"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "sshr v4.2d, v2.2d, #63",
+        "mov v5.16b, v4.16b",
+        "bsl v5.16b, v3.16b, v2.16b",
+        "mov v16.16b, v5.16b"
       ]
     },
     "pblendvb xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x38 0x10"
       ],
       "ExpectedArm64ASM": [
-        "sshr v2.16b, v16.16b, #7",
-        "bit v17.16b, v18.16b, v2.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v18.16b",
+        "mov v4.16b, v16.16b",
+        "sshr v5.16b, v4.16b, #7",
+        "mov v4.16b, v5.16b",
+        "bsl v4.16b, v3.16b, v2.16b",
+        "mov v17.16b, v4.16b"
       ]
     },
     "blendvps xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x38 0x14"
       ],
       "ExpectedArm64ASM": [
-        "sshr v2.4s, v16.4s, #31",
-        "bit v17.16b, v18.16b, v2.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v18.16b",
+        "mov v4.16b, v16.16b",
+        "sshr v5.4s, v4.4s, #31",
+        "mov v4.16b, v5.16b",
+        "bsl v4.16b, v3.16b, v2.16b",
+        "mov v17.16b, v4.16b"
       ]
     },
     "blendvpd xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x38 0x15"
       ],
       "ExpectedArm64ASM": [
-        "sshr v2.2d, v16.2d, #63",
-        "bit v17.16b, v18.16b, v2.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v18.16b",
+        "mov v4.16b, v16.16b",
+        "sshr v5.2d, v4.2d, #63",
+        "mov v4.16b, v5.16b",
+        "bsl v4.16b, v3.16b, v2.16b",
+        "mov v17.16b, v4.16b"
       ]
     },
     "ptest xmm0, xmm1": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0x66 0x0f 0x38 0x17"
       ],
       "ExpectedArm64ASM": [
-        "and v2.16b, v16.16b, v17.16b",
-        "bic v3.16b, v17.16b, v16.16b",
-        "umaxv h2, v2.8h",
-        "umaxv h3, v3.8h",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "and v4.16b, v2.16b, v3.16b",
+        "bic v5.16b, v3.16b, v2.16b",
+        "umaxv h2, v4.8h",
+        "umaxv h3, v5.8h",
         "umov w20, v2.h[0]",
         "umov w21, v3.h[0]",
-        "mov w27, #0x0",
-        "mov w26, #0x1",
+        "mov w22, #0x0",
+        "mov w23, #0x1",
         "cmp x21, #0x0 (0)",
-        "cset x21, eq",
+        "cset x24, eq",
         "tst w20, w20",
         "mrs x20, nzcv",
-        "orr w20, w20, w21, lsl #29",
-        "msr nzcv, x20"
+        "orr w21, w20, w24, lsl #29",
+        "mov x26, x23",
+        "mov x27, x22",
+        "msr nzcv, x21"
       ]
     },
     "pabsb mm0, mm1": {
@@ -417,17 +484,19 @@
       ],
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
-        "abs v2.16b, v2.16b",
-        "str d2, [x28, #768]"
+        "abs v3.16b, v2.16b",
+        "str d3, [x28, #768]"
       ]
     },
     "pabsb xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x38 0x1c"
       ],
       "ExpectedArm64ASM": [
-        "abs v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "abs v3.16b, v2.16b",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pabsw mm0, mm1": {
@@ -437,17 +506,19 @@
       ],
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
-        "abs v2.8h, v2.8h",
-        "str d2, [x28, #768]"
+        "abs v3.8h, v2.8h",
+        "str d3, [x28, #768]"
       ]
     },
     "pabsw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x38 0x1d"
       ],
       "ExpectedArm64ASM": [
-        "abs v16.8h, v17.8h"
+        "mov v2.16b, v17.16b",
+        "abs v3.8h, v2.8h",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pabsd mm0, mm1": {
@@ -457,498 +528,583 @@
       ],
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
-        "abs v2.4s, v2.4s",
-        "str d2, [x28, #768]"
+        "abs v3.4s, v2.4s",
+        "str d3, [x28, #768]"
       ]
     },
     "pabsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x38 0x1e"
       ],
       "ExpectedArm64ASM": [
-        "abs v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "abs v3.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pmovzxbw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x38 0x30"
       ],
       "ExpectedArm64ASM": [
-        "uxtl v16.8h, v17.8b"
+        "mov v2.16b, v17.16b",
+        "uxtl v3.8h, v2.8b",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pmovzxbd xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x38 0x31"
       ],
       "ExpectedArm64ASM": [
-        "uxtl v2.8h, v17.8b",
-        "uxtl v16.4s, v2.4h"
+        "mov v2.16b, v17.16b",
+        "uxtl v3.8h, v2.8b",
+        "uxtl v2.4s, v3.4h",
+        "mov v16.16b, v2.16b"
       ]
     },
     "pmovzxbq xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x38 0x32"
       ],
       "ExpectedArm64ASM": [
-        "uxtl v2.8h, v17.8b",
-        "uxtl v2.4s, v2.4h",
-        "uxtl v16.2d, v2.2s"
+        "mov v2.16b, v17.16b",
+        "uxtl v3.8h, v2.8b",
+        "uxtl v2.4s, v3.4h",
+        "uxtl v3.2d, v2.2s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pmovzxwd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x38 0x33"
       ],
       "ExpectedArm64ASM": [
-        "uxtl v16.4s, v17.4h"
+        "mov v2.16b, v17.16b",
+        "uxtl v3.4s, v2.4h",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pmovzxwq xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x38 0x34"
       ],
       "ExpectedArm64ASM": [
-        "uxtl v2.4s, v17.4h",
-        "uxtl v16.2d, v2.2s"
+        "mov v2.16b, v17.16b",
+        "uxtl v3.4s, v2.4h",
+        "uxtl v2.2d, v3.2s",
+        "mov v16.16b, v2.16b"
       ]
     },
     "pmovzxdq xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x38 0x35"
       ],
       "ExpectedArm64ASM": [
-        "uxtl v16.2d, v17.2s"
+        "mov v2.16b, v17.16b",
+        "uxtl v3.2d, v2.2s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pcmpgtq xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x38 0x37"
       ],
       "ExpectedArm64ASM": [
-        "cmgt v16.2d, v16.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "cmgt v4.2d, v3.2d, v2.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pminsb xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x38 0x38"
       ],
       "ExpectedArm64ASM": [
-        "smin v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "smin v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pminsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x38 0x39"
       ],
       "ExpectedArm64ASM": [
-        "smin v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "smin v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pminuw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x38 0x3a"
       ],
       "ExpectedArm64ASM": [
-        "umin v16.8h, v16.8h, v17.8h"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "umin v4.8h, v3.8h, v2.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pminud xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x38 0x3b"
       ],
       "ExpectedArm64ASM": [
-        "umin v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "umin v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pmaxsb xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x38 0x3c"
       ],
       "ExpectedArm64ASM": [
-        "smax v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "smax v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pmaxsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x38 0x3d"
       ],
       "ExpectedArm64ASM": [
-        "smax v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "smax v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pmaxuw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x38 0x3e"
       ],
       "ExpectedArm64ASM": [
-        "umax v16.8h, v16.8h, v17.8h"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "umax v4.8h, v3.8h, v2.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pmaxud xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x38 0x3f"
       ],
       "ExpectedArm64ASM": [
-        "umax v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "umax v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pmulld xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x38 0x40"
       ],
       "ExpectedArm64ASM": [
-        "mul v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "mul v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "phminposuw xmm0, xmm1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x66 0x0f 0x38 0x41"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2048]",
-        "zip1 v3.8h, v2.8h, v17.8h",
-        "zip2 v2.8h, v2.8h, v17.8h",
-        "umin v2.4s, v3.4s, v2.4s",
-        "uminv s2, v2.4s",
-        "rev32 v16.8h, v2.8h"
+        "mov v2.16b, v17.16b",
+        "ldr q3, [x28, #2048]",
+        "zip1 v4.8h, v3.8h, v2.8h",
+        "zip2 v5.8h, v3.8h, v2.8h",
+        "umin v2.4s, v4.4s, v5.4s",
+        "uminv s3, v2.4s",
+        "rev32 v2.8h, v3.8h",
+        "mov v16.16b, v2.16b"
       ]
     },
     "sha1nexte xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0x66 0x0f 0x38 0xc8"
       ],
       "ExpectedArm64ASM": [
-        "shl v2.4s, v16.4s, #30",
-        "usra v2.4s, v16.4s, #2",
-        "add v2.4s, v17.4s, v2.4s",
-        "mov v16.16b, v17.16b",
-        "mov v16.s[3], v2.s[3]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "shl v4.4s, v2.4s, #30",
+        "mov v5.16b, v4.16b",
+        "usra v5.4s, v2.4s, #2",
+        "add v2.4s, v3.4s, v5.4s",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[3], v2.s[3]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "sha1msg1 xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x38 0xc9"
       ],
       "ExpectedArm64ASM": [
-        "ext v2.16b, v17.16b, v16.16b, #8",
-        "eor v16.16b, v16.16b, v2.16b"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "ext v4.16b, v3.16b, v2.16b, #8",
+        "eor v3.16b, v2.16b, v4.16b",
+        "mov v16.16b, v3.16b"
       ]
     },
     "sha1msg2 xmm0, xmm1": {
-      "ExpectedInstructionCount": 15,
+      "ExpectedInstructionCount": 16,
       "Comment": [
         "0x66 0x0f 0x38 0xca"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "ext v2.16b, v2.16b, v17.16b, #12",
-        "eor v2.16b, v16.16b, v2.16b",
-        "shl v3.4s, v2.4s, #1",
-        "mov v0.16b, v3.16b",
-        "usra v0.4s, v2.4s, #31",
-        "mov v2.16b, v0.16b",
-        "dup v3.4s, v2.s[3]",
-        "eor v3.16b, v16.16b, v3.16b",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "movi v4.2d, #0x0",
+        "ext v5.16b, v4.16b, v3.16b, #12",
+        "eor v3.16b, v2.16b, v5.16b",
         "shl v4.4s, v3.4s, #1",
-        "mov v0.16b, v4.16b",
-        "usra v0.4s, v3.4s, #31",
-        "mov v3.16b, v0.16b",
-        "mov v16.16b, v2.16b",
-        "mov v16.s[0], v3.s[0]"
+        "mov v5.16b, v4.16b",
+        "usra v5.4s, v3.4s, #31",
+        "dup v3.4s, v5.s[3]",
+        "eor v4.16b, v2.16b, v3.16b",
+        "shl v2.4s, v4.4s, #1",
+        "mov v3.16b, v2.16b",
+        "usra v3.4s, v4.4s, #31",
+        "mov v2.16b, v5.16b",
+        "mov v2.s[0], v3.s[0]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "sha256rnds2 xmm0, xmm1": {
-      "ExpectedInstructionCount": 56,
+      "ExpectedInstructionCount": 61,
       "Comment": [
         "0x66 0x0f 0x38 0xcb"
       ],
       "ExpectedArm64ASM": [
-        "mov w20, v17.s[1]",
-        "mov w21, v17.s[0]",
-        "mov w22, v16.s[1]",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov w20, v3.s[1]",
+        "mov w21, v3.s[0]",
+        "mov w22, v2.s[1]",
         "and w23, w20, w21",
-        "bic w22, w22, w20",
-        "eor w22, w23, w22",
+        "bic w24, w22, w20",
+        "eor w22, w23, w24",
         "ror w23, w20, #6",
-        "eor w23, w23, w20, ror #11",
-        "eor w23, w23, w20, ror #25",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v16.s[0]",
-        "add w22, w22, w23",
-        "mov w23, v17.s[3]",
-        "mov w24, v17.s[2]",
-        "mov w25, v16.s[3]",
-        "and w30, w24, w25",
-        "orr w25, w24, w25",
-        "and w25, w23, w25",
-        "orr w25, w25, w30",
-        "add w25, w22, w25",
-        "ror w30, w23, #2",
-        "eor w30, w30, w23, ror #13",
-        "eor w30, w30, w23, ror #22",
-        "add w25, w25, w30",
-        "mov w30, v16.s[2]",
-        "add w22, w22, w30",
-        "and w20, w22, w20",
-        "bic w21, w21, w22",
-        "eor w20, w20, w21",
-        "ror w21, w22, #6",
-        "eor w21, w21, w22, ror #11",
-        "eor w21, w21, w22, ror #25",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "mov w21, v16.s[1]",
-        "add w20, w20, w21",
-        "and w21, w23, w24",
-        "orr w23, w23, w24",
-        "and w23, w25, w23",
-        "orr w21, w23, w21",
-        "add w21, w20, w21",
-        "ror w23, w25, #2",
-        "eor w23, w23, w25, ror #13",
-        "eor w23, w23, w25, ror #22",
-        "add w21, w21, w23",
-        "mov w23, v16.s[3]",
-        "add w20, w20, w23",
-        "mov v2.16b, v16.16b",
-        "mov v2.s[3], w21",
-        "mov v2.s[2], w25",
-        "mov v2.s[1], w20",
-        "mov v16.16b, v2.16b",
-        "mov v16.s[0], w22"
+        "eor w24, w23, w20, ror #11",
+        "eor w23, w24, w20, ror #25",
+        "add w24, w22, w23",
+        "mov w22, v2.s[0]",
+        "add w23, w24, w22",
+        "mov w22, v2.s[0]",
+        "add w24, w23, w22",
+        "mov w22, v3.s[3]",
+        "mov w23, v3.s[2]",
+        "mov w25, v2.s[3]",
+        "and w30, w23, w25",
+        "orr w18, w23, w25",
+        "and w25, w22, w18",
+        "orr w18, w25, w30",
+        "add w25, w24, w18",
+        "ror w30, w22, #2",
+        "eor w18, w30, w22, ror #13",
+        "eor w30, w18, w22, ror #22",
+        "add w18, w25, w30",
+        "mov w25, v2.s[2]",
+        "add w30, w24, w25",
+        "and w24, w30, w20",
+        "bic w20, w21, w30",
+        "eor w21, w24, w20",
+        "ror w20, w30, #6",
+        "eor w24, w20, w30, ror #11",
+        "eor w20, w24, w30, ror #25",
+        "add w24, w21, w20",
+        "mov w20, v2.s[1]",
+        "add w21, w24, w20",
+        "mov w20, v2.s[1]",
+        "add w24, w21, w20",
+        "and w20, w22, w23",
+        "orr w21, w22, w23",
+        "and w22, w18, w21",
+        "orr w21, w22, w20",
+        "add w20, w24, w21",
+        "ror w21, w18, #2",
+        "eor w22, w21, w18, ror #13",
+        "eor w21, w22, w18, ror #22",
+        "add w22, w20, w21",
+        "mov w20, v2.s[3]",
+        "add w21, w24, w20",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[3], w22",
+        "mov v2.16b, v3.16b",
+        "mov v2.s[2], w18",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[1], w21",
+        "mov v2.16b, v3.16b",
+        "mov v2.s[0], w30",
+        "mov v16.16b, v2.16b"
       ]
     },
     "sha256msg1 xmm0, xmm1": {
-      "ExpectedInstructionCount": 35,
+      "ExpectedInstructionCount": 40,
       "Comment": [
         "0x66 0x0f 0x38 0xcc"
       ],
       "ExpectedArm64ASM": [
-        "mov w20, v17.s[0]",
-        "mov w21, v16.s[3]",
-        "mov w22, v16.s[2]",
-        "mov w23, v16.s[1]",
-        "mov w24, v16.s[0]",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov w20, v3.s[0]",
+        "mov w21, v2.s[3]",
+        "mov w22, v2.s[2]",
+        "mov w23, v2.s[1]",
+        "mov w24, v2.s[0]",
         "ror w25, w20, #7",
         "ror w30, w20, #18",
-        "eor w25, w25, w30",
-        "lsr w20, w20, #3",
-        "eor w20, w25, w20",
-        "add w20, w21, w20",
-        "ror w25, w21, #7",
+        "eor w18, w25, w30",
+        "lsr w25, w20, #3",
+        "eor w20, w18, w25",
+        "add w25, w21, w20",
+        "ror w20, w21, #7",
         "ror w30, w21, #18",
-        "eor w25, w25, w30",
-        "lsr w21, w21, #3",
-        "eor w21, w25, w21",
-        "add w21, w22, w21",
-        "ror w25, w22, #7",
+        "eor w18, w20, w30",
+        "lsr w20, w21, #3",
+        "eor w21, w18, w20",
+        "add w20, w22, w21",
+        "ror w21, w22, #7",
         "ror w30, w22, #18",
-        "eor w25, w25, w30",
-        "lsr w22, w22, #3",
-        "eor w22, w25, w22",
-        "add w22, w23, w22",
-        "ror w25, w23, #7",
+        "eor w18, w21, w30",
+        "lsr w21, w22, #3",
+        "eor w22, w18, w21",
+        "add w21, w23, w22",
+        "ror w22, w23, #7",
         "ror w30, w23, #18",
-        "eor w25, w25, w30",
-        "lsr w23, w23, #3",
-        "eor w23, w25, w23",
-        "add w23, w24, w23",
-        "mov v2.16b, v16.16b",
-        "mov v2.s[3], w20",
-        "mov v2.s[2], w21",
-        "mov v2.s[1], w22",
-        "mov v16.16b, v2.16b",
-        "mov v16.s[0], w23"
+        "eor w18, w22, w30",
+        "lsr w22, w23, #3",
+        "eor w23, w18, w22",
+        "add w22, w24, w23",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[3], w25",
+        "mov v2.16b, v3.16b",
+        "mov v2.s[2], w20",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[1], w21",
+        "mov v2.16b, v3.16b",
+        "mov v2.s[0], w22",
+        "mov v16.16b, v2.16b"
       ]
     },
     "sha256msg2 xmm0, xmm1": {
-      "ExpectedInstructionCount": 36,
+      "ExpectedInstructionCount": 41,
       "Comment": [
         "0x66 0x0f 0x38 0xcd"
       ],
       "ExpectedArm64ASM": [
-        "mov w20, v17.s[2]",
-        "mov w21, v17.s[3]",
-        "mov w22, v16.s[0]",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov w20, v3.s[2]",
+        "mov w21, v3.s[3]",
+        "mov w22, v2.s[0]",
         "ror w23, w20, #17",
         "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w20, w20, #10",
-        "eor w20, w23, w20",
-        "add w20, w22, w20",
-        "mov w22, v16.s[1]",
-        "ror w23, w21, #17",
+        "eor w25, w23, w24",
+        "lsr w23, w20, #10",
+        "eor w20, w25, w23",
+        "add w23, w22, w20",
+        "mov w20, v2.s[1]",
+        "ror w22, w21, #17",
         "ror w24, w21, #19",
-        "eor w23, w23, w24",
-        "lsr w21, w21, #10",
-        "eor w21, w23, w21",
-        "add w21, w22, w21",
-        "mov w22, v16.s[2]",
-        "ror w23, w20, #17",
-        "ror w24, w20, #19",
-        "eor w23, w23, w24",
-        "lsr w24, w20, #10",
-        "eor w23, w23, w24",
-        "add w22, w22, w23",
-        "mov w23, v16.s[3]",
-        "ror w24, w21, #17",
-        "ror w25, w21, #19",
-        "eor w24, w24, w25",
-        "lsr w25, w21, #10",
-        "eor w24, w24, w25",
-        "add w23, w23, w24",
-        "mov v2.16b, v16.16b",
-        "mov v2.s[3], w23",
-        "mov v2.s[2], w22",
-        "mov v2.s[1], w21",
-        "mov v16.16b, v2.16b",
-        "mov v16.s[0], w20"
+        "eor w25, w22, w24",
+        "lsr w22, w21, #10",
+        "eor w21, w25, w22",
+        "add w22, w20, w21",
+        "mov w20, v2.s[2]",
+        "ror w21, w23, #17",
+        "ror w24, w23, #19",
+        "eor w25, w21, w24",
+        "lsr w21, w23, #10",
+        "eor w24, w25, w21",
+        "add w21, w20, w24",
+        "mov w20, v2.s[3]",
+        "ror w24, w22, #17",
+        "ror w25, w22, #19",
+        "eor w30, w24, w25",
+        "lsr w24, w22, #10",
+        "eor w25, w30, w24",
+        "add w24, w20, w25",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[3], w24",
+        "mov v2.16b, v3.16b",
+        "mov v2.s[2], w21",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[1], w22",
+        "mov v2.16b, v3.16b",
+        "mov v2.s[0], w23",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movbe ax, word [rbx]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x38 0xf0"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x7]",
-        "rev w20, w20",
-        "bfxil x4, x20, #16, #16"
+        "mov x20, x7",
+        "ldrh w21, [x20]",
+        "rev w20, w21",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #16, #16",
+        "mov x4, x22"
       ]
     },
     "movbe eax, dword [rbx]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x38 0xf0"
       ],
       "ExpectedArm64ASM": [
-        "ldr w20, [x7]",
-        "rev w4, w20"
+        "mov x20, x7",
+        "ldr w21, [x20]",
+        "rev w20, w21",
+        "mov x4, x20"
       ]
     },
     "movbe rax, qword [rbx]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "REX.W 0x66 0x0f 0x38 0xf0"
       ],
       "ExpectedArm64ASM": [
-        "ldr x20, [x7]",
-        "rev x4, x20"
+        "mov x20, x7",
+        "ldr x21, [x20]",
+        "rev x20, x21",
+        "mov x4, x20"
       ]
     },
     "adcx eax, ebx": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0x66 0x0f 0x38 0xf6"
       ],
       "ExpectedArm64ASM": [
         "cset w20, hs",
-        "mov w21, w7",
-        "mov w22, w4",
-        "add w23, w21, w20",
-        "add w4, w22, w23",
-        "mrs x22, nzcv",
-        "cmp w4, w21",
+        "mov x21, x7",
+        "mov w22, w21",
+        "mov x21, x4",
+        "mov w23, w21",
+        "add w21, w22, w20",
+        "add w24, w23, w21",
+        "mov x4, x24",
+        "mrs x21, nzcv",
+        "cmp w24, w22",
         "cset x23, lo",
-        "cmp w4, w21",
-        "cset x21, ls",
+        "cmp w24, w22",
+        "cset x25, ls",
         "cmp x20, #0x1 (1)",
-        "csel x20, x21, x23, eq",
-        "mov w0, w22",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
+        "csel x22, x25, x23, eq",
+        "mov w20, w21",
+        "bfi w20, w22, #29, #1",
         "msr nzcv, x20"
       ]
     },
     "adcx rax, rbx": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 16,
       "Comment": [
         "0x66 REX.W 0x0f 0x38 0xf6"
       ],
       "ExpectedArm64ASM": [
         "cset w20, hs",
-        "add x21, x7, x20",
-        "add x4, x4, x21",
-        "mrs x21, nzcv",
-        "cmp x4, x7",
-        "cset x22, lo",
-        "cmp x4, x7",
-        "cset x23, ls",
+        "mov x21, x7",
+        "mov x22, x4",
+        "add x23, x21, x20",
+        "add x24, x22, x23",
+        "mov x4, x24",
+        "mrs x22, nzcv",
+        "cmp x24, x21",
+        "cset x23, lo",
+        "cmp x24, x21",
+        "cset x25, ls",
         "cmp x20, #0x1 (1)",
-        "csel x20, x23, x22, eq",
-        "mov w0, w21",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
+        "csel x21, x25, x23, eq",
+        "mov w20, w22",
+        "bfi w20, w21, #29, #1",
         "msr nzcv, x20"
       ]
     },
     "adox eax, ebx": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xf3 0x0f 0x38 0xf6"
       ],
       "ExpectedArm64ASM": [
         "cset w20, vs",
-        "mov w21, w7",
-        "mov w22, w4",
-        "add w23, w21, w20",
-        "add w4, w22, w23",
-        "mrs x22, nzcv",
-        "cmp w4, w21",
+        "mov x21, x7",
+        "mov w22, w21",
+        "mov x21, x4",
+        "mov w23, w21",
+        "add w21, w22, w20",
+        "add w24, w23, w21",
+        "mov x4, x24",
+        "mrs x21, nzcv",
+        "cmp w24, w22",
         "cset x23, lo",
-        "cmp w4, w21",
-        "cset x21, ls",
+        "cmp w24, w22",
+        "cset x25, ls",
         "cmp x20, #0x1 (1)",
-        "csel x20, x21, x23, eq",
-        "mov w0, w22",
-        "bfi w0, w20, #28, #1",
-        "mov w20, w0",
+        "csel x22, x25, x23, eq",
+        "mov w20, w21",
+        "bfi w20, w22, #28, #1",
         "msr nzcv, x20"
       ]
     },
     "adox rax, rbx": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 16,
       "Comment": [
         "0xf3 REX.W 0x0f 0x38 0xf6"
       ],
       "ExpectedArm64ASM": [
         "cset w20, vs",
-        "add x21, x7, x20",
-        "add x4, x4, x21",
-        "mrs x21, nzcv",
-        "cmp x4, x7",
-        "cset x22, lo",
-        "cmp x4, x7",
-        "cset x23, ls",
+        "mov x21, x7",
+        "mov x22, x4",
+        "add x23, x21, x20",
+        "add x24, x22, x23",
+        "mov x4, x24",
+        "mrs x22, nzcv",
+        "cmp x24, x21",
+        "cset x23, lo",
+        "cmp x24, x21",
+        "cset x25, ls",
         "cmp x20, #0x1 (1)",
-        "csel x20, x23, x22, eq",
-        "mov w0, w21",
-        "bfi w0, w20, #28, #1",
-        "mov w20, w0",
+        "csel x21, x25, x23, eq",
+        "mov w20, w22",
+        "bfi w20, w21, #28, #1",
         "msr nzcv, x20"
       ]
     }
diff --git a/unittests/InstructionCountCI/H0F3A.json b/unittests/InstructionCountCI/H0F3A.json
index 752bc3d179..82b7a8b193 100644
--- a/unittests/InstructionCountCI/H0F3A.json
+++ b/unittests/InstructionCountCI/H0F3A.json
@@ -32,8 +32,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "ext v2.8b, v2.8b, v3.8b, #1",
-        "str d2, [x28, #768]"
+        "ext v4.8b, v2.8b, v3.8b, #1",
+        "str d4, [x28, #768]"
       ]
     },
     "palignr mm0, mm1, 255": {
@@ -47,213 +47,273 @@
       ]
     },
     "roundps xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Nearest rounding",
         "0x66 0x0f 0x3a 0x08"
       ],
       "ExpectedArm64ASM": [
-        "frintn v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "frintn v3.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "roundps xmm0, xmm1, 00000001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "-inf rounding",
         "0x66 0x0f 0x3a 0x08"
       ],
       "ExpectedArm64ASM": [
-        "frintm v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "frintm v3.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "roundps xmm0, xmm1, 00000010b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "+inf rounding",
         "0x66 0x0f 0x3a 0x08"
       ],
       "ExpectedArm64ASM": [
-        "frintp v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "frintp v3.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "roundps xmm0, xmm1, 00000011b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "truncate rounding",
         "0x66 0x0f 0x3a 0x08"
       ],
       "ExpectedArm64ASM": [
-        "frintz v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "frintz v3.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "roundps xmm0, xmm1, 00000100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "host rounding mode rounding",
         "0x66 0x0f 0x3a 0x08"
       ],
       "ExpectedArm64ASM": [
-        "frinti v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "frinti v3.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "roundpd xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Nearest rounding",
         "0x66 0x0f 0x3a 0x09"
       ],
       "ExpectedArm64ASM": [
-        "frintn v16.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "frintn v3.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "roundpd xmm0, xmm1, 00000001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "-inf rounding",
         "0x66 0x0f 0x3a 0x09"
       ],
       "ExpectedArm64ASM": [
-        "frintm v16.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "frintm v3.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "roundpd xmm0, xmm1, 00000010b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "+inf rounding",
         "0x66 0x0f 0x3a 0x09"
       ],
       "ExpectedArm64ASM": [
-        "frintp v16.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "frintp v3.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "roundpd xmm0, xmm1, 00000011b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "truncate rounding",
         "0x66 0x0f 0x3a 0x09"
       ],
       "ExpectedArm64ASM": [
-        "frintz v16.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "frintz v3.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "roundpd xmm0, xmm1, 00000100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "host rounding mode rounding",
         "0x66 0x0f 0x3a 0x09"
       ],
       "ExpectedArm64ASM": [
-        "frinti v16.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "frinti v3.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "roundss xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Nearest rounding",
         "0x66 0x0f 0x3a 0x0a"
       ],
       "ExpectedArm64ASM": [
-        "frintn s0, s17",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frintn s0, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundss xmm0, xmm1, 00000001b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "-inf rounding",
         "0x66 0x0f 0x3a 0x0a"
       ],
       "ExpectedArm64ASM": [
-        "frintm s0, s17",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frintm s0, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundss xmm0, xmm1, 00000010b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "+inf rounding",
         "0x66 0x0f 0x3a 0x0a"
       ],
       "ExpectedArm64ASM": [
-        "frintp s0, s17",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frintp s0, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundss xmm0, xmm1, 00000011b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "truncate rounding",
         "0x66 0x0f 0x3a 0x0a"
       ],
       "ExpectedArm64ASM": [
-        "frintz s0, s17",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frintz s0, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundss xmm0, xmm1, 00000100b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "host rounding mode rounding",
         "0x66 0x0f 0x3a 0x0a"
       ],
       "ExpectedArm64ASM": [
-        "frinti s0, s17",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frinti s0, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundsd xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Nearest rounding",
         "0x66 0x0f 0x3a 0x0b"
       ],
       "ExpectedArm64ASM": [
-        "frintn d0, d17",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frintn d0, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundsd xmm0, xmm1, 00000001b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "-inf rounding",
         "0x66 0x0f 0x3a 0x0b"
       ],
       "ExpectedArm64ASM": [
-        "frintm d0, d17",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frintm d0, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundsd xmm0, xmm1, 00000010b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "+inf rounding",
         "0x66 0x0f 0x3a 0x0b"
       ],
       "ExpectedArm64ASM": [
-        "frintp d0, d17",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frintp d0, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundsd xmm0, xmm1, 00000011b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "truncate rounding",
         "0x66 0x0f 0x3a 0x0b"
       ],
       "ExpectedArm64ASM": [
-        "frintz d0, d17",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frintz d0, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "roundsd xmm0, xmm1, 00000100b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "host rounding mode rounding",
         "0x66 0x0f 0x3a 0x0b"
       ],
       "ExpectedArm64ASM": [
-        "frinti d0, d17",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frinti d0, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "blendps xmm0, xmm1, 0000b": {
@@ -264,146 +324,207 @@
       "ExpectedArm64ASM": []
     },
     "blendps xmm0, xmm1, 0001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x0c"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.s[0], v17.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "blendps xmm0, xmm1, 0010b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x0c"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.s[1], v17.s[1]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[1], v3.s[1]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "blendps xmm0, xmm1, 0011b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x0c"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.d[0], v17.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "blendps xmm0, xmm1, 0100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x0c"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.s[2], v17.s[2]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[2], v3.s[2]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "blendps xmm0, xmm1, 0101b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x0c"
       ],
       "ExpectedArm64ASM": [
-        "rev64 v2.4s, v17.4s",
-        "trn2 v16.4s, v2.4s, v16.4s"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "rev64 v4.4s, v3.4s",
+        "trn2 v3.4s, v4.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "blendps xmm0, xmm1, 0110b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x3a 0x0c"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2176]",
-        "tbx v16.16b, {v17.16b}, v2.16b"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "ldr q4, [x28, #2176]",
+        "mov v0.16b, v2.16b",
+        "tbx v0.16b, {v3.16b}, v4.16b",
+        "mov v5.16b, v0.16b",
+        "mov v16.16b, v5.16b"
       ]
     },
     "blendps xmm0, xmm1, 0111b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x3a 0x0c"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2192]",
-        "tbx v16.16b, {v17.16b}, v2.16b"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "ldr q4, [x28, #2192]",
+        "mov v0.16b, v2.16b",
+        "tbx v0.16b, {v3.16b}, v4.16b",
+        "mov v5.16b, v0.16b",
+        "mov v16.16b, v5.16b"
       ]
     },
     "blendps xmm0, xmm1, 1000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x0c"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.s[3], v17.s[3]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[3], v3.s[3]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "blendps xmm0, xmm1, 1001b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x3a 0x0c"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2208]",
-        "tbx v16.16b, {v17.16b}, v2.16b"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "ldr q4, [x28, #2208]",
+        "mov v0.16b, v2.16b",
+        "tbx v0.16b, {v3.16b}, v4.16b",
+        "mov v5.16b, v0.16b",
+        "mov v16.16b, v5.16b"
       ]
     },
     "blendps xmm0, xmm1, 1010b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x0c"
       ],
       "ExpectedArm64ASM": [
-        "rev64 v2.4s, v16.4s",
-        "trn2 v16.4s, v2.4s, v17.4s"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "rev64 v4.4s, v2.4s",
+        "trn2 v2.4s, v4.4s, v3.4s",
+        "mov v16.16b, v2.16b"
       ]
     },
     "blendps xmm0, xmm1, 1011b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x3a 0x0c"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2224]",
-        "tbx v16.16b, {v17.16b}, v2.16b"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "ldr q4, [x28, #2224]",
+        "mov v0.16b, v2.16b",
+        "tbx v0.16b, {v3.16b}, v4.16b",
+        "mov v5.16b, v0.16b",
+        "mov v16.16b, v5.16b"
       ]
     },
     "blendps xmm0, xmm1, 1100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x0c"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.d[1], v17.d[1]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[1], v3.d[1]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "blendps xmm0, xmm1, 1101b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x3a 0x0c"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2240]",
-        "tbx v16.16b, {v17.16b}, v2.16b"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "ldr q4, [x28, #2240]",
+        "mov v0.16b, v2.16b",
+        "tbx v0.16b, {v3.16b}, v4.16b",
+        "mov v5.16b, v0.16b",
+        "mov v16.16b, v5.16b"
       ]
     },
     "blendps xmm0, xmm1, 1110b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x3a 0x0c"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2256]",
-        "tbx v16.16b, {v17.16b}, v2.16b"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "ldr q4, [x28, #2256]",
+        "mov v0.16b, v2.16b",
+        "tbx v0.16b, {v3.16b}, v4.16b",
+        "mov v5.16b, v0.16b",
+        "mov v16.16b, v5.16b"
       ]
     },
     "blendps xmm0, xmm1, 1111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x0c"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v16.16b, v2.16b"
       ]
     },
     "blendpd xmm0, xmm1, 00b": {
@@ -414,30 +535,39 @@
       "ExpectedArm64ASM": []
     },
     "blendpd xmm0, xmm1, 01b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x0d"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.d[0], v17.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "blendpd xmm0, xmm1, 10b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x0d"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.d[1], v17.d[1]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[1], v3.d[1]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "blendpd xmm0, xmm1, 11b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x0d"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v16.16b, v2.16b"
       ]
     },
     "pblendw xmm0, xmm1, 00000000b": {
@@ -448,1149 +578,1405 @@
       "ExpectedArm64ASM": []
     },
     "pblendw xmm0, xmm1, 00000001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x0e"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.h[0], v17.h[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.h[0], v3.h[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pblendw xmm0, xmm1, 11010111b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x66 0x0f 0x3a 0x0e"
       ],
       "ExpectedArm64ASM": [
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
         "ldr x0, [x28, #1792]",
-        "ldr q2, [x0, #3440]",
-        "tbx v16.16b, {v17.16b}, v2.16b"
+        "ldr q4, [x0, #3440]",
+        "mov v0.16b, v2.16b",
+        "tbx v0.16b, {v3.16b}, v4.16b",
+        "mov v5.16b, v0.16b",
+        "mov v16.16b, v5.16b"
       ]
     },
     "pblendw xmm0, xmm1, 00000011b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x0e"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.s[0], v17.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pblendw xmm0, xmm1, 00001100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x0e"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.s[1], v17.s[1]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[1], v3.s[1]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pblendw xmm0, xmm1, 00110000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x0e"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.s[2], v17.s[2]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[2], v3.s[2]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pblendw xmm0, xmm1, 11000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x0e"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.s[3], v17.s[3]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[3], v3.s[3]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pblendw xmm0, xmm1, 00001111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x0e"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.d[0], v17.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pblendw xmm0, xmm1, 11110000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x0e"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.d[1], v17.d[1]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[1], v3.d[1]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pblendw xmm0, xmm1, 11111111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x0e"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v16.16b, v2.16b"
       ]
     },
     "palignr xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x0f"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v16.16b, v2.16b"
       ]
     },
     "palignr xmm0, xmm1, 1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x66 0x0f 0x3a 0x0f"
       ],
       "ExpectedArm64ASM": [
-        "ext v16.16b, v17.16b, v16.16b, #1"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "ext v4.16b, v2.16b, v3.16b, #1",
+        "mov v16.16b, v4.16b"
       ]
     },
     "palignr xmm0, xmm1, 255": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x0f"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b"
       ]
     },
     "pextrb eax, xmm0, 0000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x3a 0x14"
       ],
       "ExpectedArm64ASM": [
-        "umov w4, v16.b[0]"
+        "mov v2.16b, v16.16b",
+        "umov w20, v2.b[0]",
+        "mov x4, x20"
       ]
     },
     "pextrb eax, xmm0, 1111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x3a 0x14"
       ],
       "ExpectedArm64ASM": [
-        "umov w4, v16.b[15]"
+        "mov v2.16b, v16.16b",
+        "umov w20, v2.b[15]",
+        "mov x4, x20"
       ]
     },
     "pextrw eax, xmm0, 000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x3a 0x15"
       ],
       "ExpectedArm64ASM": [
-        "umov w4, v16.h[0]"
+        "mov v2.16b, v16.16b",
+        "umov w20, v2.h[0]",
+        "mov x4, x20"
       ]
     },
     "pextrw eax, xmm0, 111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x3a 0x15"
       ],
       "ExpectedArm64ASM": [
-        "umov w4, v16.h[7]"
+        "mov v2.16b, v16.16b",
+        "umov w20, v2.h[7]",
+        "mov x4, x20"
       ]
     },
     "pextrd eax, xmm0, 00b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x3a 0x16"
       ],
       "ExpectedArm64ASM": [
-        "mov w4, v16.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov w20, v2.s[0]",
+        "mov x4, x20"
       ]
     },
     "pextrd eax, xmm0, 11b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x3a 0x16"
       ],
       "ExpectedArm64ASM": [
-        "mov w4, v16.s[3]"
+        "mov v2.16b, v16.16b",
+        "mov w20, v2.s[3]",
+        "mov x4, x20"
       ]
     },
     "pextrq rax, xmm0, 0b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 REX.W 0x0f 0x3a 0x16"
       ],
       "ExpectedArm64ASM": [
-        "mov x4, v16.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov x20, v2.d[0]",
+        "mov x4, x20"
       ]
     },
     "pextrq rax, xmm0, 1b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 REX.W 0x0f 0x3a 0x16"
       ],
       "ExpectedArm64ASM": [
-        "mov x4, v16.d[1]"
+        "mov v2.16b, v16.16b",
+        "mov x20, v2.d[1]",
+        "mov x4, x20"
       ]
     },
     "pextrb [rax], xmm0, 0000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x3a 0x14"
       ],
       "ExpectedArm64ASM": [
-        "st1 {v16.b}[0], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.b}[0], [x20]"
       ]
     },
     "pextrb [rax], xmm0, 1111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x3a 0x14"
       ],
       "ExpectedArm64ASM": [
-        "st1 {v16.b}[15], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.b}[15], [x20]"
       ]
     },
     "pextrw [rax], xmm0, 000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x3a 0x15"
       ],
       "ExpectedArm64ASM": [
-        "st1 {v16.h}[0], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.h}[0], [x20]"
       ]
     },
     "pextrw [rax], xmm0, 111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x3a 0x15"
       ],
       "ExpectedArm64ASM": [
-        "st1 {v16.h}[7], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.h}[7], [x20]"
       ]
     },
     "pextrd [rax], xmm0, 00b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x3a 0x16"
       ],
       "ExpectedArm64ASM": [
-        "st1 {v16.s}[0], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.s}[0], [x20]"
       ]
     },
     "pextrd [rax], xmm0, 11b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x3a 0x16"
       ],
       "ExpectedArm64ASM": [
-        "st1 {v16.s}[3], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.s}[3], [x20]"
       ]
     },
     "pextrq [rax], xmm0, 0b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 REX.W 0x0f 0x3a 0x16"
       ],
       "ExpectedArm64ASM": [
-        "st1 {v16.d}[0], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.d}[0], [x20]"
       ]
     },
     "pextrq [rax], xmm0, 1b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 REX.W 0x0f 0x3a 0x16"
       ],
       "ExpectedArm64ASM": [
-        "st1 {v16.d}[1], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.d}[1], [x20]"
       ]
     },
     "extractps eax, xmm0, 00b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x3a 0x17"
       ],
       "ExpectedArm64ASM": [
-        "mov w4, v16.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov w20, v2.s[0]",
+        "mov x4, x20"
       ]
     },
     "extractps eax, xmm0, 11b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x66 0x0f 0x3a 0x17"
       ],
       "ExpectedArm64ASM": [
-        "mov w4, v16.s[3]"
+        "mov v2.16b, v16.16b",
+        "mov w20, v2.s[3]",
+        "mov x4, x20"
       ]
     },
     "pinsrb xmm0, eax, 0000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x20"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.b[0], w4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.b[0], w20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrb xmm0, eax, 0001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x20"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.b[1], w4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.b[1], w20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrb xmm0, eax, 1111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x20"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.b[15], w4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.b[15], w20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrb xmm0, [rax], 0000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x20"
       ],
       "ExpectedArm64ASM": [
-        "ld1 {v16.b}[0], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.b}[0], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrb xmm0, [rax], 0001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x20"
       ],
       "ExpectedArm64ASM": [
-        "ld1 {v16.b}[1], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.b}[1], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrb xmm0, [rax], 1111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x20"
       ],
       "ExpectedArm64ASM": [
-        "ld1 {v16.b}[15], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.b}[15], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "insertps xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x21"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.s[0], v17.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "insertps xmm0, xmm1, 00001111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x21"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b"
       ]
     },
     "insertps xmm0, xmm1, 00010000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x21"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.s[1], v17.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[1], v3.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pinsrd xmm0, eax, 00b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x22"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.s[0], w4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[0], w20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrd xmm0, eax, 01b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x22"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.s[1], w4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[1], w20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrd xmm0, eax, 11b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x22"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.s[3], w4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[3], w20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrq xmm0, rax, 0b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 REX.W 0x0f 0x3a 0x22"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.d[0], x4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.d[0], x20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrq xmm0, rax, 1b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 REX.W 0x0f 0x3a 0x22"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.d[1], x4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.d[1], x20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrd xmm0, [rax], 00b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x22"
       ],
       "ExpectedArm64ASM": [
-        "ld1 {v16.s}[0], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.s}[0], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrd xmm0, [rax], 01b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x22"
       ],
       "ExpectedArm64ASM": [
-        "ld1 {v16.s}[1], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.s}[1], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrd xmm0, [rax], 11b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 0x0f 0x3a 0x22"
       ],
       "ExpectedArm64ASM": [
-        "ld1 {v16.s}[3], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.s}[3], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrq xmm0, [rax], 0b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 REX.W 0x0f 0x3a 0x22"
       ],
       "ExpectedArm64ASM": [
-        "ld1 {v16.d}[0], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.d}[0], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrq xmm0, [rax], 1b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x66 REX.W 0x0f 0x3a 0x22"
       ],
       "ExpectedArm64ASM": [
-        "ld1 {v16.d}[1], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.d}[1], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dpps xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b"
       ]
     },
     "dpps xmm0, xmm1, 00001111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b"
       ]
     },
     "dpps xmm0, xmm1, 11110000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b"
       ]
     },
     "dpps xmm0, xmm1, 11110001b": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddp v3.4s, v3.4s, v3.4s",
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddp v3.4s, v5.4s, v5.4s",
         "faddp s3, v3.2s",
-        "zip1 v16.4s, v3.4s, v2.4s"
+        "zip1 v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "dpps xmm0, xmm1, 11110010b": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddp v3.4s, v3.4s, v3.4s",
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddp v3.4s, v5.4s, v5.4s",
         "faddp s3, v3.2s",
-        "zip1 v16.2s, v2.2s, v3.2s"
+        "zip1 v4.2s, v2.2s, v3.2s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "dpps xmm0, xmm1, 11110011b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
-        "fmul v2.4s, v16.4s, v17.4s",
-        "faddp v2.4s, v2.4s, v2.4s",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fmul v4.4s, v2.4s, v3.4s",
+        "faddp v2.4s, v4.4s, v4.4s",
         "faddp s2, v2.2s",
-        "dup v16.2s, v2.s[0]"
+        "dup v3.2s, v2.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dpps xmm0, xmm1, 11110100b": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddp v3.4s, v3.4s, v3.4s",
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddp v3.4s, v5.4s, v5.4s",
         "faddp s3, v3.2s",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "zip1 v4.2d, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "dpps xmm0, xmm1, 11110101b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
-        "fmul v2.4s, v16.4s, v17.4s",
-        "faddp v2.4s, v2.4s, v2.4s",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fmul v4.4s, v2.4s, v3.4s",
+        "faddp v2.4s, v4.4s, v4.4s",
         "faddp s2, v2.2s",
-        "zip1 v16.2d, v2.2d, v2.2d"
+        "zip1 v3.2d, v2.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dpps xmm0, xmm1, 11110110b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddp v3.4s, v3.4s, v3.4s",
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddp v3.4s, v5.4s, v5.4s",
         "faddp s3, v3.2s",
-        "mov v2.s[1], v3.s[0]",
-        "mov v16.16b, v2.16b",
-        "mov v16.s[2], v3.s[0]"
+        "mov v4.16b, v2.16b",
+        "mov v4.s[1], v3.s[0]",
+        "mov v2.16b, v4.16b",
+        "mov v2.s[2], v3.s[0]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "dpps xmm0, xmm1, 11110111b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddp v3.4s, v3.4s, v3.4s",
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddp v3.4s, v5.4s, v5.4s",
         "faddp s3, v3.2s",
-        "dup v3.4s, v3.s[0]",
-        "mov v16.16b, v3.16b",
-        "mov v16.s[3], v2.s[0]"
+        "dup v4.4s, v3.s[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[3], v2.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dpps xmm0, xmm1, 11111000b": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddp v3.4s, v3.4s, v3.4s",
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddp v3.4s, v5.4s, v5.4s",
         "faddp s3, v3.2s",
-        "ext v16.16b, v2.16b, v3.16b, #4"
+        "ext v4.16b, v2.16b, v3.16b, #4",
+        "mov v16.16b, v4.16b"
       ]
     },
     "dpps xmm0, xmm1, 11111001b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddp v3.4s, v3.4s, v3.4s",
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddp v3.4s, v5.4s, v5.4s",
         "faddp s3, v3.2s",
-        "mov v2.s[0], v3.s[0]",
-        "mov v16.16b, v2.16b",
-        "mov v16.s[3], v3.s[0]"
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov v2.16b, v4.16b",
+        "mov v2.s[3], v3.s[0]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "dpps xmm0, xmm1, 11111010b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddp v3.4s, v3.4s, v3.4s",
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddp v3.4s, v5.4s, v5.4s",
         "faddp s3, v3.2s",
-        "dup v3.4s, v3.s[0]",
-        "zip1 v16.4s, v2.4s, v3.4s"
+        "dup v4.4s, v3.s[0]",
+        "zip1 v3.4s, v2.4s, v4.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dpps xmm0, xmm1, 11111011b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddp v3.4s, v3.4s, v3.4s",
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddp v3.4s, v5.4s, v5.4s",
         "faddp s3, v3.2s",
-        "dup v3.4s, v3.s[0]",
-        "mov v16.16b, v3.16b",
-        "mov v16.s[2], v2.s[0]"
+        "dup v4.4s, v3.s[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[2], v2.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dpps xmm0, xmm1, 11111100b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddp v3.4s, v3.4s, v3.4s",
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddp v3.4s, v5.4s, v5.4s",
         "faddp s3, v3.2s",
-        "dup v3.4s, v3.s[0]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "dup v4.4s, v3.s[0]",
+        "zip1 v3.2d, v2.2d, v4.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dpps xmm0, xmm1, 11111101b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddp v3.4s, v3.4s, v3.4s",
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddp v3.4s, v5.4s, v5.4s",
         "faddp s3, v3.2s",
-        "dup v3.4s, v3.s[0]",
-        "mov v16.16b, v3.16b",
-        "mov v16.s[1], v2.s[0]"
+        "dup v4.4s, v3.s[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[1], v2.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dpps xmm0, xmm1, 11111110b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddp v3.4s, v3.4s, v3.4s",
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddp v3.4s, v5.4s, v5.4s",
         "faddp s3, v3.2s",
-        "dup v3.4s, v3.s[0]",
-        "mov v16.16b, v3.16b",
-        "mov v16.s[0], v2.s[0]"
+        "dup v4.4s, v3.s[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[0], v2.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dpps xmm0, xmm1, 11111111b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
-        "fmul v2.4s, v16.4s, v17.4s",
-        "faddp v2.4s, v2.4s, v2.4s",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fmul v4.4s, v2.4s, v3.4s",
+        "faddp v2.4s, v4.4s, v4.4s",
         "faddp s2, v2.2s",
-        "dup v16.4s, v2.s[0]"
+        "dup v3.4s, v2.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dppd xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x41"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b"
       ]
     },
     "dppd xmm0, xmm1, 00001111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x41"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b"
       ]
     },
     "dppd xmm0, xmm1, 11110000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x41"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b"
       ]
     },
     "dppd xmm0, xmm1, 11111111b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x66 0x0f 0x3a 0x41"
       ],
       "ExpectedArm64ASM": [
-        "fmul v2.2d, v16.2d, v17.2d",
-        "faddp d2, v2.2d",
-        "dup v16.2d, v2.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fmul v4.2d, v2.2d, v3.2d",
+        "faddp d2, v4.2d",
+        "dup v3.2d, v2.d[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "mpsadbw xmm0, xmm1, 000b": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "0x66 0x0f 0x3a 0x42"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v17.s[0]",
-        "ext v3.16b, v16.16b, v16.16b, #0",
-        "ext v4.16b, v16.16b, v16.16b, #1",
-        "ext v5.16b, v16.16b, v16.16b, #2",
-        "ext v6.16b, v16.16b, v16.16b, #3",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v16.8h, v4.8h, v2.8h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v3.s[0]",
+        "ext v3.16b, v2.16b, v2.16b, #0",
+        "ext v5.16b, v2.16b, v2.16b, #1",
+        "ext v6.16b, v2.16b, v2.16b, #2",
+        "ext v7.16b, v2.16b, v2.16b, #3",
+        "uabdl v2.8h, v3.8b, v4.8b",
+        "uabdl v3.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "addp v4.8h, v2.8h, v5.8h",
+        "addp v2.8h, v3.8h, v6.8h",
+        "trn1 v3.4s, v4.4s, v2.4s",
+        "trn2 v5.4s, v4.4s, v2.4s",
+        "addp v2.8h, v3.8h, v5.8h",
+        "mov v16.16b, v2.16b"
       ]
     },
     "mpsadbw xmm0, xmm1, 001b": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "0x66 0x0f 0x3a 0x42"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v17.s[1]",
-        "ext v3.16b, v16.16b, v16.16b, #0",
-        "ext v4.16b, v16.16b, v16.16b, #1",
-        "ext v5.16b, v16.16b, v16.16b, #2",
-        "ext v6.16b, v16.16b, v16.16b, #3",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v16.8h, v4.8h, v2.8h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v3.s[1]",
+        "ext v3.16b, v2.16b, v2.16b, #0",
+        "ext v5.16b, v2.16b, v2.16b, #1",
+        "ext v6.16b, v2.16b, v2.16b, #2",
+        "ext v7.16b, v2.16b, v2.16b, #3",
+        "uabdl v2.8h, v3.8b, v4.8b",
+        "uabdl v3.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "addp v4.8h, v2.8h, v5.8h",
+        "addp v2.8h, v3.8h, v6.8h",
+        "trn1 v3.4s, v4.4s, v2.4s",
+        "trn2 v5.4s, v4.4s, v2.4s",
+        "addp v2.8h, v3.8h, v5.8h",
+        "mov v16.16b, v2.16b"
       ]
     },
     "mpsadbw xmm0, xmm1, 010b": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "0x66 0x0f 0x3a 0x42"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v17.s[2]",
-        "ext v3.16b, v16.16b, v16.16b, #0",
-        "ext v4.16b, v16.16b, v16.16b, #1",
-        "ext v5.16b, v16.16b, v16.16b, #2",
-        "ext v6.16b, v16.16b, v16.16b, #3",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v16.8h, v4.8h, v2.8h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v3.s[2]",
+        "ext v3.16b, v2.16b, v2.16b, #0",
+        "ext v5.16b, v2.16b, v2.16b, #1",
+        "ext v6.16b, v2.16b, v2.16b, #2",
+        "ext v7.16b, v2.16b, v2.16b, #3",
+        "uabdl v2.8h, v3.8b, v4.8b",
+        "uabdl v3.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "addp v4.8h, v2.8h, v5.8h",
+        "addp v2.8h, v3.8h, v6.8h",
+        "trn1 v3.4s, v4.4s, v2.4s",
+        "trn2 v5.4s, v4.4s, v2.4s",
+        "addp v2.8h, v3.8h, v5.8h",
+        "mov v16.16b, v2.16b"
       ]
     },
     "mpsadbw xmm0, xmm1, 011b": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "0x66 0x0f 0x3a 0x42"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v17.s[3]",
-        "ext v3.16b, v16.16b, v16.16b, #0",
-        "ext v4.16b, v16.16b, v16.16b, #1",
-        "ext v5.16b, v16.16b, v16.16b, #2",
-        "ext v6.16b, v16.16b, v16.16b, #3",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v16.8h, v4.8h, v2.8h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v3.s[3]",
+        "ext v3.16b, v2.16b, v2.16b, #0",
+        "ext v5.16b, v2.16b, v2.16b, #1",
+        "ext v6.16b, v2.16b, v2.16b, #2",
+        "ext v7.16b, v2.16b, v2.16b, #3",
+        "uabdl v2.8h, v3.8b, v4.8b",
+        "uabdl v3.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "addp v4.8h, v2.8h, v5.8h",
+        "addp v2.8h, v3.8h, v6.8h",
+        "trn1 v3.4s, v4.4s, v2.4s",
+        "trn2 v5.4s, v4.4s, v2.4s",
+        "addp v2.8h, v3.8h, v5.8h",
+        "mov v16.16b, v2.16b"
       ]
     },
     "mpsadbw xmm0, xmm1, 100b": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "0x66 0x0f 0x3a 0x42"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v17.s[0]",
-        "ext v3.16b, v16.16b, v16.16b, #4",
-        "ext v4.16b, v16.16b, v16.16b, #5",
-        "ext v5.16b, v16.16b, v16.16b, #6",
-        "ext v6.16b, v16.16b, v16.16b, #7",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v16.8h, v4.8h, v2.8h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v3.s[0]",
+        "ext v3.16b, v2.16b, v2.16b, #4",
+        "ext v5.16b, v2.16b, v2.16b, #5",
+        "ext v6.16b, v2.16b, v2.16b, #6",
+        "ext v7.16b, v2.16b, v2.16b, #7",
+        "uabdl v2.8h, v3.8b, v4.8b",
+        "uabdl v3.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "addp v4.8h, v2.8h, v5.8h",
+        "addp v2.8h, v3.8h, v6.8h",
+        "trn1 v3.4s, v4.4s, v2.4s",
+        "trn2 v5.4s, v4.4s, v2.4s",
+        "addp v2.8h, v3.8h, v5.8h",
+        "mov v16.16b, v2.16b"
       ]
     },
     "mpsadbw xmm0, xmm1, 101b": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "0x66 0x0f 0x3a 0x42"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v17.s[1]",
-        "ext v3.16b, v16.16b, v16.16b, #4",
-        "ext v4.16b, v16.16b, v16.16b, #5",
-        "ext v5.16b, v16.16b, v16.16b, #6",
-        "ext v6.16b, v16.16b, v16.16b, #7",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v16.8h, v4.8h, v2.8h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v3.s[1]",
+        "ext v3.16b, v2.16b, v2.16b, #4",
+        "ext v5.16b, v2.16b, v2.16b, #5",
+        "ext v6.16b, v2.16b, v2.16b, #6",
+        "ext v7.16b, v2.16b, v2.16b, #7",
+        "uabdl v2.8h, v3.8b, v4.8b",
+        "uabdl v3.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "addp v4.8h, v2.8h, v5.8h",
+        "addp v2.8h, v3.8h, v6.8h",
+        "trn1 v3.4s, v4.4s, v2.4s",
+        "trn2 v5.4s, v4.4s, v2.4s",
+        "addp v2.8h, v3.8h, v5.8h",
+        "mov v16.16b, v2.16b"
       ]
     },
     "mpsadbw xmm0, xmm1, 110b": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "0x66 0x0f 0x3a 0x42"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v17.s[2]",
-        "ext v3.16b, v16.16b, v16.16b, #4",
-        "ext v4.16b, v16.16b, v16.16b, #5",
-        "ext v5.16b, v16.16b, v16.16b, #6",
-        "ext v6.16b, v16.16b, v16.16b, #7",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v16.8h, v4.8h, v2.8h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v3.s[2]",
+        "ext v3.16b, v2.16b, v2.16b, #4",
+        "ext v5.16b, v2.16b, v2.16b, #5",
+        "ext v6.16b, v2.16b, v2.16b, #6",
+        "ext v7.16b, v2.16b, v2.16b, #7",
+        "uabdl v2.8h, v3.8b, v4.8b",
+        "uabdl v3.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "addp v4.8h, v2.8h, v5.8h",
+        "addp v2.8h, v3.8h, v6.8h",
+        "trn1 v3.4s, v4.4s, v2.4s",
+        "trn2 v5.4s, v4.4s, v2.4s",
+        "addp v2.8h, v3.8h, v5.8h",
+        "mov v16.16b, v2.16b"
       ]
     },
     "mpsadbw xmm0, xmm1, 111b": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "0x66 0x0f 0x3a 0x42"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v17.s[3]",
-        "ext v3.16b, v16.16b, v16.16b, #4",
-        "ext v4.16b, v16.16b, v16.16b, #5",
-        "ext v5.16b, v16.16b, v16.16b, #6",
-        "ext v6.16b, v16.16b, v16.16b, #7",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v16.8h, v4.8h, v2.8h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v3.s[3]",
+        "ext v3.16b, v2.16b, v2.16b, #4",
+        "ext v5.16b, v2.16b, v2.16b, #5",
+        "ext v6.16b, v2.16b, v2.16b, #6",
+        "ext v7.16b, v2.16b, v2.16b, #7",
+        "uabdl v2.8h, v3.8b, v4.8b",
+        "uabdl v3.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "addp v4.8h, v2.8h, v5.8h",
+        "addp v2.8h, v3.8h, v6.8h",
+        "trn1 v3.4s, v4.4s, v2.4s",
+        "trn2 v5.4s, v4.4s, v2.4s",
+        "addp v2.8h, v3.8h, v5.8h",
+        "mov v16.16b, v2.16b"
       ]
     },
     "sha1rnds4 xmm0, xmm1, 00b": {
-      "ExpectedInstructionCount": 57,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0x66 0x0f 0x3a 0xcc"
       ],
       "ExpectedArm64ASM": [
+        "sub sp, sp, #0x60 (96)",
         "mov w20, #0x7999",
         "movk w20, #0x5a82, lsl #16",
-        "mov w20, v17.s[3]",
-        "mov w21, v16.s[3]",
-        "mov w22, v16.s[2]",
-        "mov w23, v16.s[1]",
-        "mov w24, v16.s[0]",
-        "and w25, w22, w23",
-        "bic w30, w24, w22",
-        "eor w25, w25, w30",
-        "ror w30, w21, #27",
-        "add w25, w25, w30",
-        "add w20, w25, w20",
-        "mov w25, #0x7999",
-        "movk w25, #0x5a82, lsl #16",
-        "add w20, w20, w25",
-        "ror w22, w22, #2",
-        "mov w25, v17.s[2]",
-        "add w24, w25, w24",
-        "and w25, w21, w22",
-        "bic w30, w23, w21",
-        "eor w25, w25, w30",
-        "ror w30, w20, #27",
-        "add w25, w25, w30",
-        "add w24, w25, w24",
-        "mov w25, #0x7999",
-        "movk w25, #0x5a82, lsl #16",
-        "add w24, w24, w25",
-        "ror w21, w21, #2",
-        "mov w25, v17.s[1]",
-        "add w23, w25, w23",
-        "and w25, w20, w21",
-        "bic w30, w22, w20",
-        "eor w25, w25, w30",
-        "ror w30, w24, #27",
-        "add w25, w25, w30",
-        "add w23, w25, w23",
-        "mov w25, #0x7999",
-        "movk w25, #0x5a82, lsl #16",
-        "add w23, w23, w25",
-        "ror w20, w20, #2",
-        "mov w30, v17.s[0]",
-        "add w22, w30, w22",
-        "and w30, w24, w20",
-        "bic w21, w21, w24",
-        "eor w21, w30, w21",
-        "ror w30, w23, #27",
-        "add w21, w21, w30",
-        "add w21, w21, w22",
-        "add w21, w21, w25",
-        "ror w22, w24, #2",
-        "mov v2.16b, v16.16b",
-        "mov v2.s[3], w21",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov w21, v3.s[3]",
+        "mov w22, v2.s[3]",
+        "mov w23, v2.s[2]",
+        "mov w24, v2.s[1]",
+        "mov w25, v2.s[0]",
+        "and w30, w23, w24",
+        "bic w18, w25, w23",
+        "str w24, [sp]",
+        "eor w24, w30, w18",
+        "ror w30, w22, #27",
+        "add w18, w24, w30",
+        "add w24, w18, w21",
+        "add w21, w24, w20",
+        "ror w24, w23, #2",
+        "mov w23, v3.s[2]",
+        "add w30, w23, w25",
+        "and w23, w22, w24",
+        "ldr w25, [sp]",
+        "bic w18, w25, w22",
+        "str w24, [sp, #32]",
+        "eor w24, w23, w18",
+        "ror w23, w21, #27",
+        "add w18, w24, w23",
+        "add w23, w18, w30",
+        "add w24, w23, w20",
+        "ror w23, w22, #2",
+        "mov w22, v3.s[1]",
+        "add w30, w22, w25",
+        "and w22, w21, w23",
+        "ldr w25, [sp, #32]",
+        "bic w18, w25, w21",
+        "str w23, [sp, #64]",
+        "eor w23, w22, w18",
+        "ror w22, w24, #27",
+        "add w18, w23, w22",
+        "add w22, w18, w30",
+        "add w23, w22, w20",
+        "ror w22, w21, #2",
+        "mov w21, v3.s[0]",
+        "add w30, w21, w25",
+        "and w21, w24, w22",
+        "ldr w25, [sp, #64]",
+        "bic w18, w25, w24",
+        "eor w25, w21, w18",
+        "ror w21, w23, #27",
+        "add w18, w25, w21",
+        "add w21, w18, w30",
+        "add w25, w21, w20",
+        "ror w20, w24, #2",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[3], w25",
+        "mov v2.16b, v3.16b",
         "mov v2.s[2], w23",
-        "mov v2.s[1], w22",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[1], w20",
+        "mov v2.16b, v3.16b",
+        "mov v2.s[0], w22",
         "mov v16.16b, v2.16b",
-        "mov v16.s[0], w20"
+        "add sp, sp, #0x60 (96)"
       ]
     },
     "sha1rnds4 xmm0, xmm1, 01b": {
-      "ExpectedInstructionCount": 53,
+      "ExpectedInstructionCount": 60,
       "Comment": [
         "0x66 0x0f 0x3a 0xcc"
       ],
       "ExpectedArm64ASM": [
+        "sub sp, sp, #0x60 (96)",
         "mov w20, #0xeba1",
         "movk w20, #0x6ed9, lsl #16",
-        "mov w20, v17.s[3]",
-        "mov w21, v16.s[3]",
-        "mov w22, v16.s[2]",
-        "mov w23, v16.s[1]",
-        "mov w24, v16.s[0]",
-        "eor w25, w22, w23",
-        "eor w25, w25, w24",
-        "ror w30, w21, #27",
-        "add w25, w25, w30",
-        "add w20, w25, w20",
-        "mov w25, #0xeba1",
-        "movk w25, #0x6ed9, lsl #16",
-        "add w20, w20, w25",
-        "ror w22, w22, #2",
-        "mov w25, v17.s[2]",
-        "add w24, w25, w24",
-        "eor w25, w21, w22",
-        "eor w25, w25, w23",
-        "ror w30, w20, #27",
-        "add w25, w25, w30",
-        "add w24, w25, w24",
-        "mov w25, #0xeba1",
-        "movk w25, #0x6ed9, lsl #16",
-        "add w24, w24, w25",
-        "ror w21, w21, #2",
-        "mov w25, v17.s[1]",
-        "add w23, w25, w23",
-        "eor w25, w20, w21",
-        "eor w25, w25, w22",
-        "ror w30, w24, #27",
-        "add w25, w25, w30",
-        "add w23, w25, w23",
-        "mov w25, #0xeba1",
-        "movk w25, #0x6ed9, lsl #16",
-        "add w23, w23, w25",
-        "ror w20, w20, #2",
-        "mov w30, v17.s[0]",
-        "add w22, w30, w22",
-        "eor w30, w24, w20",
-        "eor w21, w30, w21",
-        "ror w30, w23, #27",
-        "add w21, w21, w30",
-        "add w21, w21, w22",
-        "add w21, w21, w25",
-        "ror w22, w24, #2",
-        "mov v2.16b, v16.16b",
-        "mov v2.s[3], w21",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov w21, v3.s[3]",
+        "mov w22, v2.s[3]",
+        "mov w23, v2.s[2]",
+        "mov w24, v2.s[1]",
+        "mov w25, v2.s[0]",
+        "eor w30, w23, w24",
+        "eor w18, w30, w25",
+        "ror w30, w22, #27",
+        "str w24, [sp]",
+        "add w24, w18, w30",
+        "add w30, w24, w21",
+        "add w21, w30, w20",
+        "ror w24, w23, #2",
+        "mov w23, v3.s[2]",
+        "add w30, w23, w25",
+        "eor w23, w22, w24",
+        "ldr w25, [sp]",
+        "eor w18, w23, w25",
+        "ror w23, w21, #27",
+        "str w24, [sp, #32]",
+        "add w24, w18, w23",
+        "add w23, w24, w30",
+        "add w24, w23, w20",
+        "ror w23, w22, #2",
+        "mov w22, v3.s[1]",
+        "add w30, w22, w25",
+        "eor w22, w21, w23",
+        "ldr w25, [sp, #32]",
+        "eor w18, w22, w25",
+        "ror w22, w24, #27",
+        "str w23, [sp, #64]",
+        "add w23, w18, w22",
+        "add w22, w23, w30",
+        "add w23, w22, w20",
+        "ror w22, w21, #2",
+        "mov w21, v3.s[0]",
+        "add w30, w21, w25",
+        "eor w21, w24, w22",
+        "ldr w25, [sp, #64]",
+        "eor w18, w21, w25",
+        "ror w21, w23, #27",
+        "add w25, w18, w21",
+        "add w21, w25, w30",
+        "add w25, w21, w20",
+        "ror w20, w24, #2",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[3], w25",
+        "mov v2.16b, v3.16b",
         "mov v2.s[2], w23",
-        "mov v2.s[1], w22",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[1], w20",
+        "mov v2.16b, v3.16b",
+        "mov v2.s[0], w22",
         "mov v16.16b, v2.16b",
-        "mov v16.s[0], w20"
+        "add sp, sp, #0x60 (96)"
       ]
     },
     "sha1rnds4 xmm0, xmm1, 10b": {
-      "ExpectedInstructionCount": 61,
+      "ExpectedInstructionCount": 68,
       "Comment": [
         "0x66 0x0f 0x3a 0xcc"
       ],
       "ExpectedArm64ASM": [
+        "sub sp, sp, #0x60 (96)",
         "mov w20, #0xbcdc",
         "movk w20, #0x8f1b, lsl #16",
-        "mov w20, v17.s[3]",
-        "mov w21, v16.s[3]",
-        "mov w22, v16.s[2]",
-        "mov w23, v16.s[1]",
-        "mov w24, v16.s[0]",
-        "and w25, w23, w24",
-        "orr w30, w23, w24",
-        "and w30, w22, w30",
-        "orr w25, w30, w25",
-        "ror w30, w21, #27",
-        "add w25, w25, w30",
-        "add w20, w25, w20",
-        "mov w25, #0xbcdc",
-        "movk w25, #0x8f1b, lsl #16",
-        "add w20, w20, w25",
-        "ror w22, w22, #2",
-        "mov w25, v17.s[2]",
-        "add w24, w25, w24",
-        "and w25, w22, w23",
-        "orr w30, w22, w23",
-        "and w30, w21, w30",
-        "orr w25, w30, w25",
-        "ror w30, w20, #27",
-        "add w25, w25, w30",
-        "add w24, w25, w24",
-        "mov w25, #0xbcdc",
-        "movk w25, #0x8f1b, lsl #16",
-        "add w24, w24, w25",
-        "ror w21, w21, #2",
-        "mov w25, v17.s[1]",
-        "add w23, w25, w23",
-        "and w25, w21, w22",
-        "orr w30, w21, w22",
-        "and w30, w20, w30",
-        "orr w25, w30, w25",
-        "ror w30, w24, #27",
-        "add w25, w25, w30",
-        "add w23, w25, w23",
-        "mov w25, #0xbcdc",
-        "movk w25, #0x8f1b, lsl #16",
-        "add w23, w23, w25",
-        "ror w20, w20, #2",
-        "mov w30, v17.s[0]",
-        "add w22, w30, w22",
-        "and w30, w20, w21",
-        "orr w21, w20, w21",
-        "and w21, w24, w21",
-        "orr w21, w21, w30",
-        "ror w30, w23, #27",
-        "add w21, w21, w30",
-        "add w21, w21, w22",
-        "add w21, w21, w25",
-        "ror w22, w24, #2",
-        "mov v2.16b, v16.16b",
-        "mov v2.s[3], w21",
-        "mov v2.s[2], w23",
-        "mov v2.s[1], w22",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov w21, v3.s[3]",
+        "mov w22, v2.s[3]",
+        "mov w23, v2.s[2]",
+        "mov w24, v2.s[1]",
+        "mov w25, v2.s[0]",
+        "and w30, w24, w25",
+        "orr w18, w24, w25",
+        "str w24, [sp]",
+        "and w24, w23, w18",
+        "orr w18, w24, w30",
+        "ror w24, w22, #27",
+        "add w30, w18, w24",
+        "add w24, w30, w21",
+        "add w21, w24, w20",
+        "ror w24, w23, #2",
+        "mov w23, v3.s[2]",
+        "add w30, w23, w25",
+        "ldr w23, [sp]",
+        "and w25, w24, w23",
+        "orr w18, w24, w23",
+        "str w24, [sp, #32]",
+        "and w24, w22, w18",
+        "orr w18, w24, w25",
+        "ror w24, w21, #27",
+        "add w25, w18, w24",
+        "add w24, w25, w30",
+        "add w25, w24, w20",
+        "ror w24, w22, #2",
+        "mov w22, v3.s[1]",
+        "add w30, w22, w23",
+        "ldr w22, [sp, #32]",
+        "and w23, w24, w22",
+        "orr w18, w24, w22",
+        "str w24, [sp, #64]",
+        "and w24, w21, w18",
+        "orr w18, w24, w23",
+        "ror w23, w25, #27",
+        "add w24, w18, w23",
+        "add w23, w24, w30",
+        "add w24, w23, w20",
+        "ror w23, w21, #2",
+        "mov w21, v3.s[0]",
+        "add w30, w21, w22",
+        "ldr w21, [sp, #64]",
+        "and w22, w23, w21",
+        "orr w18, w23, w21",
+        "and w21, w25, w18",
+        "orr w18, w21, w22",
+        "ror w21, w24, #27",
+        "add w22, w18, w21",
+        "add w21, w22, w30",
+        "add w22, w21, w20",
+        "ror w20, w25, #2",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[3], w22",
+        "mov v2.16b, v3.16b",
+        "mov v2.s[2], w24",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[1], w20",
+        "mov v2.16b, v3.16b",
+        "mov v2.s[0], w23",
         "mov v16.16b, v2.16b",
-        "mov v16.s[0], w20"
+        "add sp, sp, #0x60 (96)"
       ]
     },
     "sha1rnds4 xmm0, xmm1, 11b": {
-      "ExpectedInstructionCount": 53,
+      "ExpectedInstructionCount": 60,
       "Comment": [
         "0x66 0x0f 0x3a 0xcc"
       ],
       "ExpectedArm64ASM": [
+        "sub sp, sp, #0x60 (96)",
         "mov w20, #0xc1d6",
         "movk w20, #0xca62, lsl #16",
-        "mov w20, v17.s[3]",
-        "mov w21, v16.s[3]",
-        "mov w22, v16.s[2]",
-        "mov w23, v16.s[1]",
-        "mov w24, v16.s[0]",
-        "eor w25, w22, w23",
-        "eor w25, w25, w24",
-        "ror w30, w21, #27",
-        "add w25, w25, w30",
-        "add w20, w25, w20",
-        "mov w25, #0xc1d6",
-        "movk w25, #0xca62, lsl #16",
-        "add w20, w20, w25",
-        "ror w22, w22, #2",
-        "mov w25, v17.s[2]",
-        "add w24, w25, w24",
-        "eor w25, w21, w22",
-        "eor w25, w25, w23",
-        "ror w30, w20, #27",
-        "add w25, w25, w30",
-        "add w24, w25, w24",
-        "mov w25, #0xc1d6",
-        "movk w25, #0xca62, lsl #16",
-        "add w24, w24, w25",
-        "ror w21, w21, #2",
-        "mov w25, v17.s[1]",
-        "add w23, w25, w23",
-        "eor w25, w20, w21",
-        "eor w25, w25, w22",
-        "ror w30, w24, #27",
-        "add w25, w25, w30",
-        "add w23, w25, w23",
-        "mov w25, #0xc1d6",
-        "movk w25, #0xca62, lsl #16",
-        "add w23, w23, w25",
-        "ror w20, w20, #2",
-        "mov w30, v17.s[0]",
-        "add w22, w30, w22",
-        "eor w30, w24, w20",
-        "eor w21, w30, w21",
-        "ror w30, w23, #27",
-        "add w21, w21, w30",
-        "add w21, w21, w22",
-        "add w21, w21, w25",
-        "ror w22, w24, #2",
-        "mov v2.16b, v16.16b",
-        "mov v2.s[3], w21",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov w21, v3.s[3]",
+        "mov w22, v2.s[3]",
+        "mov w23, v2.s[2]",
+        "mov w24, v2.s[1]",
+        "mov w25, v2.s[0]",
+        "eor w30, w23, w24",
+        "eor w18, w30, w25",
+        "ror w30, w22, #27",
+        "str w24, [sp]",
+        "add w24, w18, w30",
+        "add w30, w24, w21",
+        "add w21, w30, w20",
+        "ror w24, w23, #2",
+        "mov w23, v3.s[2]",
+        "add w30, w23, w25",
+        "eor w23, w22, w24",
+        "ldr w25, [sp]",
+        "eor w18, w23, w25",
+        "ror w23, w21, #27",
+        "str w24, [sp, #32]",
+        "add w24, w18, w23",
+        "add w23, w24, w30",
+        "add w24, w23, w20",
+        "ror w23, w22, #2",
+        "mov w22, v3.s[1]",
+        "add w30, w22, w25",
+        "eor w22, w21, w23",
+        "ldr w25, [sp, #32]",
+        "eor w18, w22, w25",
+        "ror w22, w24, #27",
+        "str w23, [sp, #64]",
+        "add w23, w18, w22",
+        "add w22, w23, w30",
+        "add w23, w22, w20",
+        "ror w22, w21, #2",
+        "mov w21, v3.s[0]",
+        "add w30, w21, w25",
+        "eor w21, w24, w22",
+        "ldr w25, [sp, #64]",
+        "eor w18, w21, w25",
+        "ror w21, w23, #27",
+        "add w25, w18, w21",
+        "add w21, w25, w30",
+        "add w25, w21, w20",
+        "ror w20, w24, #2",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[3], w25",
+        "mov v2.16b, v3.16b",
         "mov v2.s[2], w23",
-        "mov v2.s[1], w22",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[1], w20",
+        "mov v2.16b, v3.16b",
+        "mov v2.s[0], w22",
         "mov v16.16b, v2.16b",
-        "mov v16.s[0], w20"
+        "add sp, sp, #0x60 (96)"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/H0F3A_SVE128.json b/unittests/InstructionCountCI/H0F3A_SVE128.json
index 18d1ca3396..c58985af9f 100644
--- a/unittests/InstructionCountCI/H0F3A_SVE128.json
+++ b/unittests/InstructionCountCI/H0F3A_SVE128.json
@@ -11,259 +11,315 @@
   },
   "Instructions": {
     "dpps xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b"
       ]
     },
     "dpps xmm0, xmm1, 00001111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b"
       ]
     },
     "dpps xmm0, xmm1, 11110000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b"
       ]
     },
     "dpps xmm0, xmm1, 11110001b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddv s3, p6, z3.s",
-        "zip1 v16.4s, v3.4s, v2.4s"
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddv s3, p6, z5.s",
+        "zip1 v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "dpps xmm0, xmm1, 11110010b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddv s3, p6, z3.s",
-        "zip1 v16.2s, v2.2s, v3.2s"
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddv s3, p6, z5.s",
+        "zip1 v4.2s, v2.2s, v3.2s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "dpps xmm0, xmm1, 11110011b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
-        "fmul v2.4s, v16.4s, v17.4s",
-        "faddv s2, p6, z2.s",
-        "dup v16.2s, v2.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fmul v4.4s, v2.4s, v3.4s",
+        "faddv s2, p6, z4.s",
+        "dup v3.2s, v2.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dpps xmm0, xmm1, 11110100b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddv s3, p6, z3.s",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddv s3, p6, z5.s",
+        "zip1 v4.2d, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "dpps xmm0, xmm1, 11110101b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
-        "fmul v2.4s, v16.4s, v17.4s",
-        "faddv s2, p6, z2.s",
-        "zip1 v16.2d, v2.2d, v2.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fmul v4.4s, v2.4s, v3.4s",
+        "faddv s2, p6, z4.s",
+        "zip1 v3.2d, v2.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dpps xmm0, xmm1, 11110110b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddv s3, p6, z3.s",
-        "mov v2.s[1], v3.s[0]",
-        "mov v16.16b, v2.16b",
-        "mov v16.s[2], v3.s[0]"
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddv s3, p6, z5.s",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[1], v3.s[0]",
+        "mov v2.16b, v4.16b",
+        "mov v2.s[2], v3.s[0]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "dpps xmm0, xmm1, 11110111b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddv s3, p6, z3.s",
-        "dup v3.4s, v3.s[0]",
-        "mov v16.16b, v3.16b",
-        "mov v16.s[3], v2.s[0]"
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddv s3, p6, z5.s",
+        "dup v4.4s, v3.s[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[3], v2.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dpps xmm0, xmm1, 11111000b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddv s3, p6, z3.s",
-        "ext v16.16b, v2.16b, v3.16b, #4"
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddv s3, p6, z5.s",
+        "ext v4.16b, v2.16b, v3.16b, #4",
+        "mov v16.16b, v4.16b"
       ]
     },
     "dpps xmm0, xmm1, 11111001b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddv s3, p6, z3.s",
-        "mov v2.s[0], v3.s[0]",
-        "mov v16.16b, v2.16b",
-        "mov v16.s[3], v3.s[0]"
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddv s3, p6, z5.s",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov v2.16b, v4.16b",
+        "mov v2.s[3], v3.s[0]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "dpps xmm0, xmm1, 11111010b": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddv s3, p6, z3.s",
-        "dup v3.4s, v3.s[0]",
-        "zip1 v16.4s, v2.4s, v3.4s"
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddv s3, p6, z5.s",
+        "dup v4.4s, v3.s[0]",
+        "zip1 v3.4s, v2.4s, v4.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dpps xmm0, xmm1, 11111011b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddv s3, p6, z3.s",
-        "dup v3.4s, v3.s[0]",
-        "mov v16.16b, v3.16b",
-        "mov v16.s[2], v2.s[0]"
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddv s3, p6, z5.s",
+        "dup v4.4s, v3.s[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[2], v2.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dpps xmm0, xmm1, 11111100b": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddv s3, p6, z3.s",
-        "dup v3.4s, v3.s[0]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddv s3, p6, z5.s",
+        "dup v4.4s, v3.s[0]",
+        "zip1 v3.2d, v2.2d, v4.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dpps xmm0, xmm1, 11111101b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddv s3, p6, z3.s",
-        "dup v3.4s, v3.s[0]",
-        "mov v16.16b, v3.16b",
-        "mov v16.s[1], v2.s[0]"
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddv s3, p6, z5.s",
+        "dup v4.4s, v3.s[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[1], v2.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dpps xmm0, xmm1, 11111110b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
         "movi v2.2d, #0x0",
-        "fmul v3.4s, v16.4s, v17.4s",
-        "faddv s3, p6, z3.s",
-        "dup v3.4s, v3.s[0]",
-        "mov v16.16b, v3.16b",
-        "mov v16.s[0], v2.s[0]"
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v17.16b",
+        "fmul v5.4s, v3.4s, v4.4s",
+        "faddv s3, p6, z5.s",
+        "dup v4.4s, v3.s[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[0], v2.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dpps xmm0, xmm1, 11111111b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x66 0x0f 0x3a 0x40"
       ],
       "ExpectedArm64ASM": [
-        "fmul v2.4s, v16.4s, v17.4s",
-        "faddv s2, p6, z2.s",
-        "dup v16.4s, v2.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fmul v4.4s, v2.4s, v3.4s",
+        "faddv s2, p6, z4.s",
+        "dup v3.4s, v2.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "dppd xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x41"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b"
       ]
     },
     "dppd xmm0, xmm1, 00001111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x41"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b"
       ]
     },
     "dppd xmm0, xmm1, 11110000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "0x66 0x0f 0x3a 0x41"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b"
       ]
     },
     "dppd xmm0, xmm1, 11111111b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x66 0x0f 0x3a 0x41"
       ],
       "ExpectedArm64ASM": [
-        "fmul v2.2d, v16.2d, v17.2d",
-        "faddv d2, p6, z2.d",
-        "dup v16.2d, v2.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fmul v4.2d, v2.2d, v3.2d",
+        "faddv d2, p6, z4.d",
+        "dup v3.2d, v2.d[0]",
+        "mov v16.16b, v3.16b"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/Primary.json b/unittests/InstructionCountCI/Primary.json
index 5fcaf32e0c..7c7a317908 100644
--- a/unittests/InstructionCountCI/Primary.json
+++ b/unittests/InstructionCountCI/Primary.json
@@ -11,2410 +11,3129 @@
   },
   "Instructions": {
     "add bl, cl": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x00",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "lsl w0, w7, #24",
-        "cmn w0, w5, lsl #24",
-        "add w26, w7, w5",
-        "bfxil x7, x26, #0, #8"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
+        "cmn w0, w20, lsl #24",
+        "add w22, w21, w20",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x7, x20"
       ]
     },
     "add bx, cx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x01",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "lsl w0, w7, #16",
-        "cmn w0, w5, lsl #16",
-        "add w26, w7, w5",
-        "bfxil x7, x26, #0, #16"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
+        "cmn w0, w20, lsl #16",
+        "add w22, w21, w20",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x7, x20"
       ]
     },
     "add ebx, ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x01",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "adds w26, w7, w5",
-        "mov x7, x26"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "adds w22, w21, w20",
+        "mov x26, x22",
+        "mov x7, x22"
       ]
     },
     "add rbx, rcx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x01",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "adds x26, x7, x5",
-        "mov x7, x26"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "adds x22, x21, x20",
+        "mov x26, x22",
+        "mov x7, x22"
       ]
     },
     "db 0x02, 0xcb": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "0x02",
         "add bl, cl but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "lsl w0, w5, #24",
-        "cmn w0, w7, lsl #24",
-        "add w26, w5, w7",
-        "bfxil x5, x26, #0, #8"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
+        "cmn w0, w20, lsl #24",
+        "add w22, w21, w20",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x5, x20"
       ]
     },
     "db 0x66, 0x03, 0xcb": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "0x03",
         "add bx, cx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "lsl w0, w5, #16",
-        "cmn w0, w7, lsl #16",
-        "add w26, w5, w7",
-        "bfxil x5, x26, #0, #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
+        "cmn w0, w20, lsl #16",
+        "add w22, w21, w20",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x5, x20"
       ]
     },
     "db 0x03, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x03",
         "add ebx, ecx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "adds w26, w5, w7",
-        "mov x5, x26"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "adds w22, w21, w20",
+        "mov x26, x22",
+        "mov x5, x22"
       ]
     },
     "db 0x48, 0x03, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x03",
         "add rbx, rcx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "adds x26, x5, x7",
-        "mov x5, x26"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "adds x22, x21, x20",
+        "mov x26, x22",
+        "mov x5, x22"
       ]
     },
     "add al, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x04",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #24",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #24",
         "cmn w0, w20, lsl #24",
-        "add w26, w27, #0x1 (1)",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #8"
+        "add w20, w21, #0x1 (1)",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "add ax, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x05",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w27, #0x1 (1)",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #16"
+        "add w20, w21, #0x1 (1)",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "add eax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x05",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "adds w26, w27, #0x1 (1)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mov x27, x20",
+        "adds w21, w20, #0x1 (1)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "add rax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x05",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "adds x26, x27, #0x1 (1)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mov x27, x20",
+        "adds x21, x20, #0x1 (1)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "add al, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x04",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "lsl w0, w4, #24",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
         "cmn w0, w20, lsl #24",
-        "add w26, w4, #0xff (255)",
-        "bfxil x4, x26, #0, #8"
+        "add w20, w21, #0xff (255)",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "add ax, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x05",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "mvn w27, w4",
-        "lsl w0, w4, #16",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w4, w20",
-        "bfxil x4, x26, #0, #16"
+        "add w22, w21, w20",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x4, x20"
       ]
     },
     "add eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x05",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "adds w26, w4, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "adds w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "add rax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x05",
       "ExpectedArm64ASM": [
-        "mvn w27, w4",
-        "subs x26, x4, #0x1 (1)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mvn w21, w20",
+        "mov x27, x21",
+        "subs x21, x20, #0x1 (1)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "or bl, bh": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": "",
       "ExpectedArm64ASM": [
-        "lsr w20, w7, #8",
-        "orr w26, w7, w20",
-        "bfxil x7, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x7",
+        "lsr w21, w20, #8",
+        "orr w22, w20, w21",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #8",
+        "mov x7, x21",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #24"
       ]
     },
     "or bl, cl": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x08",
       "ExpectedArm64ASM": [
-        "orr w26, w7, w5",
-        "bfxil x7, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x5",
+        "mov x21, x7",
+        "orr w22, w21, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x7, x20",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #24"
       ]
     },
     "or bx, cx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x09",
       "ExpectedArm64ASM": [
-        "orr w26, w7, w5",
-        "bfxil x7, x26, #0, #16",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x5",
+        "mov x21, x7",
+        "orr w22, w21, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x7, x20",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #16"
       ]
     },
     "or ebx, ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x09",
       "ExpectedArm64ASM": [
-        "orr w7, w7, w5",
-        "mov x26, x7",
-        "tst w7, w7"
+        "mov x20, x5",
+        "mov x21, x7",
+        "orr w22, w21, w20",
+        "mov x7, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "or rbx, rcx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x09",
       "ExpectedArm64ASM": [
-        "orr x7, x7, x5",
-        "mov x26, x7",
-        "tst x7, x7"
+        "mov x20, x5",
+        "mov x21, x7",
+        "orr x22, x21, x20",
+        "mov x7, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "db 0x0A, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x0A",
         "or bl, cl but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "orr w26, w5, w7",
-        "bfxil x5, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x7",
+        "mov x21, x5",
+        "orr w22, w21, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x5, x20",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #24"
       ]
     },
     "db 0x66, 0x0B, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x0B",
         "or bx, cx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "orr w26, w5, w7",
-        "bfxil x5, x26, #0, #16",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "orr w22, w21, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x5, x20",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #16"
       ]
     },
     "db 0x0B, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x0B",
         "or ebx, ecx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "orr w5, w5, w7",
-        "mov x26, x5",
-        "tst w5, w5"
+        "mov x20, x7",
+        "mov x21, x5",
+        "orr w22, w21, w20",
+        "mov x5, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "db 0x48, 0x0B, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x0B",
         "or rbx, rcx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "orr x5, x5, x7",
-        "mov x26, x5",
-        "tst x5, x5"
+        "mov x20, x7",
+        "mov x21, x5",
+        "orr x22, x21, x20",
+        "mov x5, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "or al, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0C",
       "ExpectedArm64ASM": [
-        "orr w26, w4, #0x1",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "orr w21, w20, #0x1",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "or ax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0D",
       "ExpectedArm64ASM": [
-        "orr w26, w4, #0x1",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x4",
+        "orr w21, w20, #0x1",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #16"
       ]
     },
     "or eax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0D",
       "ExpectedArm64ASM": [
-        "orr w4, w4, #0x1",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x4",
+        "orr w21, w20, #0x1",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "or rax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0D",
       "ExpectedArm64ASM": [
-        "orr x4, x4, #0x1",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x4",
+        "orr x21, x20, #0x1",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst x21, x21"
       ]
     },
     "or al, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0C",
       "ExpectedArm64ASM": [
-        "orr w26, w4, #0xff",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "orr w21, w20, #0xff",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "or ax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0D",
       "ExpectedArm64ASM": [
-        "orr w26, w4, #0xffff",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x4",
+        "orr w21, w20, #0xffff",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #16"
       ]
     },
     "or eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0D",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "orr w4, w4, w20",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x21, x4",
+        "orr w22, w21, w20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "or rax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0D",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "orr x4, x4, x20",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x21, x4",
+        "orr x22, x21, x20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "adc bl, cl": {
-      "ExpectedInstructionCount": 20,
+      "ExpectedInstructionCount": 26,
       "Comment": "0x10",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "cset w20, hs",
-        "adc w21, w7, w5",
-        "uxtb w26, w21",
-        "cmp x26, x5",
-        "cset x21, lo",
-        "cmp x26, x5",
-        "cset x22, ls",
-        "cmp x20, #0x1 (1)",
-        "csel x20, x22, x21, eq",
-        "cmn wzr, w26, lsl #24",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "eor w21, w7, w5",
-        "eor w22, w26, w7",
-        "bic w21, w22, w21",
-        "ubfx x21, x21, #7, #1",
-        "orr w20, w20, w21, lsl #28",
-        "bfxil x7, x26, #0, #8",
-        "msr nzcv, x20"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxtb w24, w23",
+        "cmp x24, x20",
+        "cset x23, lo",
+        "cmp x24, x20",
+        "cset x25, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x30, x25, x23, eq",
+        "cmn wzr, w24, lsl #24",
+        "mrs x22, nzcv",
+        "orr w23, w22, w30, lsl #29",
+        "eor w22, w21, w20",
+        "eor w20, w24, w21",
+        "bic w25, w20, w22",
+        "ubfx x20, x25, #7, #1",
+        "orr w22, w23, w20, lsl #28",
+        "mov x26, x24",
+        "mov x20, x21",
+        "bfxil x20, x24, #0, #8",
+        "mov x7, x20",
+        "msr nzcv, x22"
       ]
     },
     "adc bx, cx": {
-      "ExpectedInstructionCount": 20,
+      "ExpectedInstructionCount": 26,
       "Comment": "0x11",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "cset w20, hs",
-        "adc w21, w7, w5",
-        "uxth w26, w21",
-        "cmp x26, x5",
-        "cset x21, lo",
-        "cmp x26, x5",
-        "cset x22, ls",
-        "cmp x20, #0x1 (1)",
-        "csel x20, x22, x21, eq",
-        "cmn wzr, w26, lsl #16",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "eor w21, w7, w5",
-        "eor w22, w26, w7",
-        "bic w21, w22, w21",
-        "ubfx x21, x21, #15, #1",
-        "orr w20, w20, w21, lsl #28",
-        "bfxil x7, x26, #0, #16",
-        "msr nzcv, x20"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxth w24, w23",
+        "cmp x24, x20",
+        "cset x23, lo",
+        "cmp x24, x20",
+        "cset x25, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x30, x25, x23, eq",
+        "cmn wzr, w24, lsl #16",
+        "mrs x22, nzcv",
+        "orr w23, w22, w30, lsl #29",
+        "eor w22, w21, w20",
+        "eor w20, w24, w21",
+        "bic w25, w20, w22",
+        "ubfx x20, x25, #15, #1",
+        "orr w22, w23, w20, lsl #28",
+        "mov x26, x24",
+        "mov x20, x21",
+        "bfxil x20, x24, #0, #16",
+        "mov x7, x20",
+        "msr nzcv, x22"
       ]
     },
     "adc ebx, ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x11",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "adcs w26, w7, w5",
-        "mov x7, x26"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "adcs w22, w21, w20",
+        "mov x26, x22",
+        "mov x7, x22"
       ]
     },
     "adc rbx, rcx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x11",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "adcs x26, x7, x5",
-        "mov x7, x26"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "adcs x22, x21, x20",
+        "mov x26, x22",
+        "mov x7, x22"
       ]
     },
     "db 0x12, 0xcb": {
-      "ExpectedInstructionCount": 20,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0x12",
         "adc bl, cl but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "cset w20, hs",
-        "adc w21, w5, w7",
-        "uxtb w26, w21",
-        "cmp x26, x7",
-        "cset x21, lo",
-        "cmp x26, x7",
-        "cset x22, ls",
-        "cmp x20, #0x1 (1)",
-        "csel x20, x22, x21, eq",
-        "cmn wzr, w26, lsl #24",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "eor w21, w5, w7",
-        "eor w22, w26, w5",
-        "bic w21, w22, w21",
-        "ubfx x21, x21, #7, #1",
-        "orr w20, w20, w21, lsl #28",
-        "bfxil x5, x26, #0, #8",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxtb w24, w23",
+        "cmp x24, x20",
+        "cset x23, lo",
+        "cmp x24, x20",
+        "cset x25, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x30, x25, x23, eq",
+        "cmn wzr, w24, lsl #24",
+        "mrs x22, nzcv",
+        "orr w23, w22, w30, lsl #29",
+        "eor w22, w21, w20",
+        "eor w20, w24, w21",
+        "bic w25, w20, w22",
+        "ubfx x20, x25, #7, #1",
+        "orr w22, w23, w20, lsl #28",
+        "mov x26, x24",
+        "mov x20, x21",
+        "bfxil x20, x24, #0, #8",
+        "mov x5, x20",
+        "msr nzcv, x22"
       ]
     },
     "db 0x66, 0x13, 0xcb": {
-      "ExpectedInstructionCount": 20,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0x13",
         "adc bx, cx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "cset w20, hs",
-        "adc w21, w5, w7",
-        "uxth w26, w21",
-        "cmp x26, x7",
-        "cset x21, lo",
-        "cmp x26, x7",
-        "cset x22, ls",
-        "cmp x20, #0x1 (1)",
-        "csel x20, x22, x21, eq",
-        "cmn wzr, w26, lsl #16",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "eor w21, w5, w7",
-        "eor w22, w26, w5",
-        "bic w21, w22, w21",
-        "ubfx x21, x21, #15, #1",
-        "orr w20, w20, w21, lsl #28",
-        "bfxil x5, x26, #0, #16",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxth w24, w23",
+        "cmp x24, x20",
+        "cset x23, lo",
+        "cmp x24, x20",
+        "cset x25, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x30, x25, x23, eq",
+        "cmn wzr, w24, lsl #16",
+        "mrs x22, nzcv",
+        "orr w23, w22, w30, lsl #29",
+        "eor w22, w21, w20",
+        "eor w20, w24, w21",
+        "bic w25, w20, w22",
+        "ubfx x20, x25, #15, #1",
+        "orr w22, w23, w20, lsl #28",
+        "mov x26, x24",
+        "mov x20, x21",
+        "bfxil x20, x24, #0, #16",
+        "mov x5, x20",
+        "msr nzcv, x22"
       ]
     },
     "db 0x13, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x13",
         "adc ebx, ecx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "adcs w26, w5, w7",
-        "mov x5, x26"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "adcs w22, w21, w20",
+        "mov x26, x22",
+        "mov x5, x22"
       ]
     },
     "db 0x48, 0x13, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x13",
         "adc rbx, rcx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "adcs x26, x5, x7",
-        "mov x5, x26"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "adcs x22, x21, x20",
+        "mov x26, x22",
+        "mov x5, x22"
       ]
     },
     "adc al, 1": {
-      "ExpectedInstructionCount": 20,
+      "ExpectedInstructionCount": 23,
       "Comment": "0x14",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "cset w21, hs",
-        "adc w20, w27, w20",
-        "uxtb w26, w20",
-        "cmp w26, #0x1 (1)",
-        "cset x20, lo",
-        "cmp w26, #0x1 (1)",
-        "cset x22, ls",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w26, w27",
-        "ubfx x21, x21, #7, #1",
-        "orr w20, w20, w21, lsl #28",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #8",
-        "msr nzcv, x20"
+        "mov x21, x4",
+        "mov x27, x21",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxtb w20, w23",
+        "cmp w20, #0x1 (1)",
+        "cset x23, lo",
+        "cmp w20, #0x1 (1)",
+        "cset x24, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x23, eq",
+        "cmn wzr, w20, lsl #24",
+        "mrs x22, nzcv",
+        "orr w23, w22, w25, lsl #29",
+        "bic w22, w20, w21",
+        "ubfx x24, x22, #7, #1",
+        "orr w22, w23, w24, lsl #28",
+        "mov x26, x20",
+        "mov x23, x21",
+        "bfxil x23, x20, #0, #8",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "adc ax, 1": {
-      "ExpectedInstructionCount": 20,
+      "ExpectedInstructionCount": 23,
       "Comment": "0x15",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "cset w21, hs",
-        "adc w20, w27, w20",
-        "uxth w26, w20",
-        "cmp w26, #0x1 (1)",
-        "cset x20, lo",
-        "cmp w26, #0x1 (1)",
-        "cset x22, ls",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #16",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w26, w27",
-        "ubfx x21, x21, #15, #1",
-        "orr w20, w20, w21, lsl #28",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #16",
-        "msr nzcv, x20"
+        "mov x21, x4",
+        "mov x27, x21",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxth w20, w23",
+        "cmp w20, #0x1 (1)",
+        "cset x23, lo",
+        "cmp w20, #0x1 (1)",
+        "cset x24, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x23, eq",
+        "cmn wzr, w20, lsl #16",
+        "mrs x22, nzcv",
+        "orr w23, w22, w25, lsl #29",
+        "bic w22, w20, w21",
+        "ubfx x24, x22, #15, #1",
+        "orr w22, w23, w24, lsl #28",
+        "mov x26, x20",
+        "mov x23, x21",
+        "bfxil x23, x20, #0, #16",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "adc eax, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x15",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "adcs w26, w27, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adcs w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "adc rax, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x15",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "adcs x26, x27, x20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adcs x22, x21, x20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "adc al, -1": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 24,
       "Comment": "0x14",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "cset w21, hs",
-        "adc w20, w4, w20",
-        "uxtb w26, w20",
-        "cmp w26, #0xff (255)",
-        "cset x20, lo",
-        "cmp w26, #0xff (255)",
-        "cset x22, ls",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w4, w26",
-        "ubfx x21, x21, #7, #1",
-        "orr w20, w20, w21, lsl #28",
-        "bfxil x4, x26, #0, #8",
-        "msr nzcv, x20"
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxtb w20, w23",
+        "cmp w20, #0xff (255)",
+        "cset x23, lo",
+        "cmp w20, #0xff (255)",
+        "cset x24, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x23, eq",
+        "cmn wzr, w20, lsl #24",
+        "mrs x22, nzcv",
+        "orr w23, w22, w25, lsl #29",
+        "bic w22, w21, w20",
+        "ubfx x24, x22, #7, #1",
+        "orr w22, w23, w24, lsl #28",
+        "mov x26, x20",
+        "mov x23, x21",
+        "bfxil x23, x20, #0, #8",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "adc ax, -1": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 24,
       "Comment": "0x15",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "mvn w27, w4",
-        "cset w21, hs",
-        "adc w22, w4, w20",
-        "uxth w26, w22",
-        "cmp w26, w20",
-        "cset x22, lo",
-        "cmp w26, w20",
-        "cset x20, ls",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x20, x22, eq",
-        "cmn wzr, w26, lsl #16",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w4, w26",
-        "ubfx x21, x21, #15, #1",
-        "orr w20, w20, w21, lsl #28",
-        "bfxil x4, x26, #0, #16",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxth w24, w23",
+        "cmp w24, w20",
+        "cset x23, lo",
+        "cmp w24, w20",
+        "cset x25, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x20, x25, x23, eq",
+        "cmn wzr, w24, lsl #16",
+        "mrs x22, nzcv",
+        "orr w23, w22, w20, lsl #29",
+        "bic w20, w21, w24",
+        "ubfx x22, x20, #15, #1",
+        "orr w20, w23, w22, lsl #28",
+        "mov x26, x24",
+        "mov x22, x21",
+        "bfxil x22, x24, #0, #16",
+        "mov x4, x22",
         "msr nzcv, x20"
       ]
     },
     "adc eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x15",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "adcs w26, w4, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "adcs w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "adc rax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x15",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "mvn w27, w4",
-        "adcs x26, x4, x20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "adcs x22, x21, x20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sbb bl, cl": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 27,
       "Comment": "0x18",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "cset w20, hs",
-        "add w21, w5, w20",
-        "sub w21, w7, w21",
-        "uxtb w26, w21",
-        "cmp x26, x7",
-        "cset x21, hi",
-        "cmp x26, x7",
-        "cset x22, hs",
-        "cmp x20, #0x1 (1)",
-        "csel x20, x22, x21, eq",
-        "cmn wzr, w26, lsl #24",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "eor w21, w7, w5",
-        "eor w22, w26, w7",
-        "and w21, w22, w21",
-        "ubfx x21, x21, #7, #1",
-        "orr w20, w20, w21, lsl #28",
-        "bfxil x7, x26, #0, #8",
-        "msr nzcv, x20"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w24, w21, w23",
+        "uxtb w23, w24",
+        "cmp x23, x21",
+        "cset x24, hi",
+        "cmp x23, x21",
+        "cset x25, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x30, x25, x24, eq",
+        "cmn wzr, w23, lsl #24",
+        "mrs x22, nzcv",
+        "orr w24, w22, w30, lsl #29",
+        "eor w22, w21, w20",
+        "eor w20, w23, w21",
+        "and w25, w20, w22",
+        "ubfx x20, x25, #7, #1",
+        "orr w22, w24, w20, lsl #28",
+        "mov x26, x23",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #8",
+        "mov x7, x20",
+        "msr nzcv, x22"
       ]
     },
     "sbb bx, cx": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 27,
       "Comment": "0x19",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "cset w20, hs",
-        "add w21, w5, w20",
-        "sub w21, w7, w21",
-        "uxth w26, w21",
-        "cmp x26, x7",
-        "cset x21, hi",
-        "cmp x26, x7",
-        "cset x22, hs",
-        "cmp x20, #0x1 (1)",
-        "csel x20, x22, x21, eq",
-        "cmn wzr, w26, lsl #16",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "eor w21, w7, w5",
-        "eor w22, w26, w7",
-        "and w21, w22, w21",
-        "ubfx x21, x21, #15, #1",
-        "orr w20, w20, w21, lsl #28",
-        "bfxil x7, x26, #0, #16",
-        "msr nzcv, x20"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w24, w21, w23",
+        "uxth w23, w24",
+        "cmp x23, x21",
+        "cset x24, hi",
+        "cmp x23, x21",
+        "cset x25, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x30, x25, x24, eq",
+        "cmn wzr, w23, lsl #16",
+        "mrs x22, nzcv",
+        "orr w24, w22, w30, lsl #29",
+        "eor w22, w21, w20",
+        "eor w20, w23, w21",
+        "and w25, w20, w22",
+        "ubfx x20, x25, #15, #1",
+        "orr w22, w24, w20, lsl #28",
+        "mov x26, x23",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x7, x20",
+        "msr nzcv, x22"
       ]
     },
     "sbb ebx, ecx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x19",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sbcs w26, w7, w5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "sbcs w22, w21, w20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x7, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x26, x22",
+        "mov x7, x22",
+        "msr nzcv, x21"
       ]
     },
     "sbb rbx, rcx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x19",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sbcs x26, x7, x5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "sbcs x22, x21, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x7, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x26, x22",
+        "mov x7, x22",
+        "msr nzcv, x21"
       ]
     },
     "db 0x1A, 0xcb": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 27,
       "Comment": [
         "0x1A",
         "sbb bl, cl but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "cset w20, hs",
-        "add w21, w7, w20",
-        "sub w21, w5, w21",
-        "uxtb w26, w21",
-        "cmp x26, x5",
-        "cset x21, hi",
-        "cmp x26, x5",
-        "cset x22, hs",
-        "cmp x20, #0x1 (1)",
-        "csel x20, x22, x21, eq",
-        "cmn wzr, w26, lsl #24",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "eor w21, w5, w7",
-        "eor w22, w26, w5",
-        "and w21, w22, w21",
-        "ubfx x21, x21, #7, #1",
-        "orr w20, w20, w21, lsl #28",
-        "bfxil x5, x26, #0, #8",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w24, w21, w23",
+        "uxtb w23, w24",
+        "cmp x23, x21",
+        "cset x24, hi",
+        "cmp x23, x21",
+        "cset x25, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x30, x25, x24, eq",
+        "cmn wzr, w23, lsl #24",
+        "mrs x22, nzcv",
+        "orr w24, w22, w30, lsl #29",
+        "eor w22, w21, w20",
+        "eor w20, w23, w21",
+        "and w25, w20, w22",
+        "ubfx x20, x25, #7, #1",
+        "orr w22, w24, w20, lsl #28",
+        "mov x26, x23",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #8",
+        "mov x5, x20",
+        "msr nzcv, x22"
       ]
     },
     "db 0x66, 0x1B, 0xcb": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 27,
       "Comment": [
         "0x1B",
         "sbb bx, cx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "cset w20, hs",
-        "add w21, w7, w20",
-        "sub w21, w5, w21",
-        "uxth w26, w21",
-        "cmp x26, x5",
-        "cset x21, hi",
-        "cmp x26, x5",
-        "cset x22, hs",
-        "cmp x20, #0x1 (1)",
-        "csel x20, x22, x21, eq",
-        "cmn wzr, w26, lsl #16",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "eor w21, w5, w7",
-        "eor w22, w26, w5",
-        "and w21, w22, w21",
-        "ubfx x21, x21, #15, #1",
-        "orr w20, w20, w21, lsl #28",
-        "bfxil x5, x26, #0, #16",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w24, w21, w23",
+        "uxth w23, w24",
+        "cmp x23, x21",
+        "cset x24, hi",
+        "cmp x23, x21",
+        "cset x25, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x30, x25, x24, eq",
+        "cmn wzr, w23, lsl #16",
+        "mrs x22, nzcv",
+        "orr w24, w22, w30, lsl #29",
+        "eor w22, w21, w20",
+        "eor w20, w23, w21",
+        "and w25, w20, w22",
+        "ubfx x20, x25, #15, #1",
+        "orr w22, w24, w20, lsl #28",
+        "mov x26, x23",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x5, x20",
+        "msr nzcv, x22"
       ]
     },
     "db 0x1B, 0xcb": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0x1B",
         "sbb ebx, ecx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sbcs w26, w5, w7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "sbcs w22, w21, w20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x5, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x26, x22",
+        "mov x5, x22",
+        "msr nzcv, x21"
       ]
     },
     "db 0x48, 0x1B, 0xcb": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0x1B",
         "sbb rbx, rcx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sbcs x26, x5, x7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "sbcs x22, x21, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x5, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x26, x22",
+        "mov x5, x22",
+        "msr nzcv, x21"
       ]
     },
     "sbb al, 1": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 24,
       "Comment": "0x1C",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w27, w20",
-        "uxtb w26, w20",
-        "cmp x26, x27",
+        "mov x21, x4",
+        "mov x27, x21",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w20, w21, w23",
+        "uxtb w23, w20",
+        "cmp x23, x21",
         "cset x20, hi",
-        "cmp x26, x27",
-        "cset x22, hs",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w27, w26",
-        "ubfx x21, x21, #7, #1",
-        "orr w20, w20, w21, lsl #28",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #8",
+        "cmp x23, x21",
+        "cset x24, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w23, lsl #24",
+        "mrs x20, nzcv",
+        "orr w22, w20, w25, lsl #29",
+        "bic w20, w21, w23",
+        "ubfx x24, x20, #7, #1",
+        "orr w20, w22, w24, lsl #28",
+        "mov x26, x23",
+        "mov x22, x21",
+        "bfxil x22, x23, #0, #8",
+        "mov x4, x22",
         "msr nzcv, x20"
       ]
     },
     "sbb ax, 1": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 24,
       "Comment": "0x1D",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w27, w20",
-        "uxth w26, w20",
-        "cmp x26, x27",
+        "mov x21, x4",
+        "mov x27, x21",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w20, w21, w23",
+        "uxth w23, w20",
+        "cmp x23, x21",
         "cset x20, hi",
-        "cmp x26, x27",
-        "cset x22, hs",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #16",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w27, w26",
-        "ubfx x21, x21, #15, #1",
-        "orr w20, w20, w21, lsl #28",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #16",
+        "cmp x23, x21",
+        "cset x24, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w23, lsl #16",
+        "mrs x20, nzcv",
+        "orr w22, w20, w25, lsl #29",
+        "bic w20, w21, w23",
+        "ubfx x24, x20, #15, #1",
+        "orr w20, w22, w24, lsl #28",
+        "mov x26, x23",
+        "mov x22, x21",
+        "bfxil x22, x23, #0, #16",
+        "mov x4, x22",
         "msr nzcv, x20"
       ]
     },
     "sbb eax, 1": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x1D",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs w26, w27, w20",
+        "mov x21, x4",
+        "mov x27, x21",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "sbcs w22, w21, w20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x26, x22",
+        "mov x4, x22",
+        "msr nzcv, x21"
       ]
     },
     "sbb rax, 1": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x1D",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs x26, x27, x20",
+        "mov x21, x4",
+        "mov x27, x21",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "sbcs x22, x21, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x26, x22",
+        "mov x4, x22",
+        "msr nzcv, x21"
       ]
     },
     "sbb al, -1": {
-      "ExpectedInstructionCount": 20,
+      "ExpectedInstructionCount": 25,
       "Comment": "0x1C",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w4, w20",
-        "uxtb w26, w20",
-        "cmp x26, x4",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w20, w21, w23",
+        "uxtb w23, w20",
+        "cmp x23, x21",
         "cset x20, hi",
-        "cmp x26, x4",
-        "cset x22, hs",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w26, w4",
-        "ubfx x21, x21, #7, #1",
-        "orr w20, w20, w21, lsl #28",
-        "bfxil x4, x26, #0, #8",
+        "cmp x23, x21",
+        "cset x24, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w23, lsl #24",
+        "mrs x20, nzcv",
+        "orr w22, w20, w25, lsl #29",
+        "bic w20, w23, w21",
+        "ubfx x24, x20, #7, #1",
+        "orr w20, w22, w24, lsl #28",
+        "mov x26, x23",
+        "mov x22, x21",
+        "bfxil x22, x23, #0, #8",
+        "mov x4, x22",
         "msr nzcv, x20"
       ]
     },
     "sbb ax, -1": {
-      "ExpectedInstructionCount": 20,
+      "ExpectedInstructionCount": 25,
       "Comment": "0x1D",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "mvn w27, w4",
-        "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w4, w20",
-        "uxth w26, w20",
-        "cmp x26, x4",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w20, w21, w23",
+        "uxth w23, w20",
+        "cmp x23, x21",
         "cset x20, hi",
-        "cmp x26, x4",
-        "cset x22, hs",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #16",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w26, w4",
-        "ubfx x21, x21, #15, #1",
-        "orr w20, w20, w21, lsl #28",
-        "bfxil x4, x26, #0, #16",
+        "cmp x23, x21",
+        "cset x24, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w23, lsl #16",
+        "mrs x20, nzcv",
+        "orr w22, w20, w25, lsl #29",
+        "bic w20, w23, w21",
+        "ubfx x24, x20, #15, #1",
+        "orr w20, w22, w24, lsl #28",
+        "mov x26, x23",
+        "mov x22, x21",
+        "bfxil x22, x23, #0, #16",
+        "mov x4, x22",
         "msr nzcv, x20"
       ]
     },
     "sbb eax, -1": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x1D",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs w26, w4, w20",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "sbcs w22, w21, w20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x26, x22",
+        "mov x4, x22",
+        "msr nzcv, x21"
       ]
     },
     "sbb rax, -1": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x1D",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "mvn w27, w4",
-        "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs x26, x4, x20",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "sbcs x22, x21, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x26, x22",
+        "mov x4, x22",
+        "msr nzcv, x21"
       ]
     },
     "and bl, cl": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x20",
       "ExpectedArm64ASM": [
-        "and w26, w7, w5",
-        "cmn wzr, w26, lsl #24",
-        "bfxil x7, x26, #0, #8"
+        "mov x20, x5",
+        "mov x21, x7",
+        "and w22, w21, w20",
+        "cmn wzr, w22, lsl #24",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x7, x20"
       ]
     },
     "and bx, cx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x21",
       "ExpectedArm64ASM": [
-        "and w26, w7, w5",
-        "cmn wzr, w26, lsl #16",
-        "bfxil x7, x26, #0, #16"
+        "mov x20, x5",
+        "mov x21, x7",
+        "and w22, w21, w20",
+        "cmn wzr, w22, lsl #16",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x7, x20"
       ]
     },
     "and ebx, ecx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x21",
       "ExpectedArm64ASM": [
-        "ands w26, w7, w5",
-        "mov x7, x26"
+        "mov x20, x5",
+        "mov x21, x7",
+        "ands w22, w21, w20",
+        "mov x26, x22",
+        "mov x7, x22"
       ]
     },
     "and rbx, rcx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x21",
       "ExpectedArm64ASM": [
-        "ands x26, x7, x5",
-        "mov x7, x26"
+        "mov x20, x5",
+        "mov x21, x7",
+        "ands x22, x21, x20",
+        "mov x26, x22",
+        "mov x7, x22"
       ]
     },
     "db 0x22, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x22",
         "and bl, cl but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "and w26, w5, w7",
-        "cmn wzr, w26, lsl #24",
-        "bfxil x5, x26, #0, #8"
-      ]
+        "mov x20, x7",
+        "mov x21, x5",
+        "and w22, w21, w20",
+        "cmn wzr, w22, lsl #24",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x5, x20"
+      ]
     },
     "db 0x66, 0x23, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x23",
         "and bx, cx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "and w26, w5, w7",
-        "cmn wzr, w26, lsl #16",
-        "bfxil x5, x26, #0, #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "and w22, w21, w20",
+        "cmn wzr, w22, lsl #16",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x5, x20"
       ]
     },
     "db 0x23, 0xcb": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x23",
         "and ebx, ecx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "ands w26, w5, w7",
-        "mov x5, x26"
+        "mov x20, x7",
+        "mov x21, x5",
+        "ands w22, w21, w20",
+        "mov x26, x22",
+        "mov x5, x22"
       ]
     },
     "db 0x48, 0x23, 0xcb": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x23",
         "and rbx, rcx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "ands x26, x5, x7",
-        "mov x5, x26"
+        "mov x20, x7",
+        "mov x21, x5",
+        "ands x22, x21, x20",
+        "mov x26, x22",
+        "mov x5, x22"
       ]
     },
     "and al, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x24",
       "ExpectedArm64ASM": [
-        "and w26, w4, #0x1",
-        "cmn wzr, w26, lsl #24",
-        "bfxil x4, x26, #0, #8"
+        "mov x20, x4",
+        "and w21, w20, #0x1",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22"
       ]
     },
     "and ax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x25",
       "ExpectedArm64ASM": [
-        "and w26, w4, #0x1",
-        "cmn wzr, w26, lsl #16",
-        "bfxil x4, x26, #0, #16"
+        "mov x20, x4",
+        "and w21, w20, #0x1",
+        "cmn wzr, w21, lsl #16",
+        "mov x26, x21",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "and eax, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x25",
       "ExpectedArm64ASM": [
-        "ands w26, w4, #0x1",
-        "mov x4, x26"
+        "mov x20, x4",
+        "ands w21, w20, #0x1",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "and rax, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x25",
       "ExpectedArm64ASM": [
-        "ands x26, x4, #0x1",
-        "mov x4, x26"
+        "mov x20, x4",
+        "ands x21, x20, #0x1",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "and al, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x24",
       "ExpectedArm64ASM": [
-        "and w26, w4, #0xff",
-        "cmn wzr, w26, lsl #24",
-        "bfxil x4, x26, #0, #8"
+        "mov x20, x4",
+        "and w21, w20, #0xff",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22"
       ]
     },
     "and ax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x25",
       "ExpectedArm64ASM": [
-        "and w26, w4, #0xffff",
-        "cmn wzr, w26, lsl #16",
-        "bfxil x4, x26, #0, #16"
+        "mov x20, x4",
+        "and w21, w20, #0xffff",
+        "cmn wzr, w21, lsl #16",
+        "mov x26, x21",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "and eax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x25",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "ands w26, w4, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "ands w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "and rax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x25",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "ands x26, x4, x20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "ands x22, x21, x20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sub bl, cl": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 14,
       "Comment": "0x28",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "lsl w0, w7, #24",
-        "cmp w0, w5, lsl #24",
-        "sub w26, w7, w5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
+        "cmp w0, w20, lsl #24",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "bfxil x7, x26, #0, #8",
-        "msr nzcv, x20"
+        "eor w23, w20, #0x20000000",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x7, x20",
+        "msr nzcv, x23"
       ]
     },
     "sub bx, cx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 14,
       "Comment": "0x29",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "lsl w0, w7, #16",
-        "cmp w0, w5, lsl #16",
-        "sub w26, w7, w5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
+        "cmp w0, w20, lsl #16",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "bfxil x7, x26, #0, #16",
-        "msr nzcv, x20"
+        "eor w23, w20, #0x20000000",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x7, x20",
+        "msr nzcv, x23"
       ]
     },
     "sub ebx, ecx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x29",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "subs w26, w7, w5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x7, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x7, x22",
+        "msr nzcv, x21"
       ]
     },
     "sub rbx, rcx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x29",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "subs x26, x7, x5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs x22, x21, x20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x7, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x7, x22",
+        "msr nzcv, x21"
       ]
     },
     "db 0x2A, 0xcb": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0x2A",
         "sub bl, cl but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "lsl w0, w5, #24",
-        "cmp w0, w7, lsl #24",
-        "sub w26, w5, w7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
+        "cmp w0, w20, lsl #24",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "bfxil x5, x26, #0, #8",
-        "msr nzcv, x20"
+        "eor w23, w20, #0x20000000",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x5, x20",
+        "msr nzcv, x23"
       ]
     },
     "db 0x66, 0x2B, 0xcb": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0x2B",
         "sub bx, cx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "lsl w0, w5, #16",
-        "cmp w0, w7, lsl #16",
-        "sub w26, w5, w7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
+        "cmp w0, w20, lsl #16",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "bfxil x5, x26, #0, #16",
-        "msr nzcv, x20"
+        "eor w23, w20, #0x20000000",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x5, x20",
+        "msr nzcv, x23"
       ]
     },
     "db 0x2B, 0xcb": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0x2B",
         "sub ebx, ecx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "subs w26, w5, w7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x5, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x5, x22",
+        "msr nzcv, x21"
       ]
     },
     "db 0x48, 0x2B, 0xcb": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0x2B",
         "sub rbx, rcx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "subs x26, x5, x7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs x22, x21, x20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x5, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x5, x22",
+        "msr nzcv, x21"
       ]
     },
     "sub al, 1": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x2C",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #24",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w27, #0x1 (1)",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #8",
-        "msr nzcv, x20"
+        "sub w20, w21, #0x1 (1)",
+        "mov x26, x20",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22",
+        "msr nzcv, x23"
       ]
     },
     "sub ax, 1": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x2D",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #16",
         "cmp w0, w20, lsl #16",
-        "sub w26, w27, #0x1 (1)",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #16",
-        "msr nzcv, x20"
+        "sub w20, w21, #0x1 (1)",
+        "mov x26, x20",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22",
+        "msr nzcv, x23"
       ]
     },
     "sub eax, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x2D",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs w26, w27, #0x1 (1)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs w21, w20, #0x1 (1)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "sub rax, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x2D",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs x26, x27, #0x1 (1)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs x21, x20, #0x1 (1)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "sub al, -1": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 14,
       "Comment": "0x2C",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "lsl w0, w4, #24",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w4, #0xff (255)",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "bfxil x4, x26, #0, #8",
-        "msr nzcv, x20"
+        "sub w20, w21, #0xff (255)",
+        "mov x26, x20",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22",
+        "msr nzcv, x23"
       ]
     },
     "sub ax, -1": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 14,
       "Comment": "0x2D",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "mvn w27, w4",
-        "lsl w0, w4, #16",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
         "cmp w0, w20, lsl #16",
-        "sub w26, w4, w20",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "bfxil x4, x26, #0, #16",
-        "msr nzcv, x20"
+        "eor w23, w20, #0x20000000",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x4, x20",
+        "msr nzcv, x23"
       ]
     },
     "sub eax, -1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x2D",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "subs w26, w4, w20",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x4, x22",
+        "msr nzcv, x21"
       ]
     },
     "sub rax, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x2D",
       "ExpectedArm64ASM": [
-        "mvn w27, w4",
-        "adds x26, x4, #0x1 (1)",
+        "mov x20, x4",
+        "mvn w21, w20",
+        "mov x27, x21",
+        "adds x21, x20, #0x1 (1)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "xor bl, cl": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x30",
       "ExpectedArm64ASM": [
-        "eor w26, w7, w5",
-        "bfxil x7, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x7, x20",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #24"
       ]
     },
     "xor bx, cx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x31",
       "ExpectedArm64ASM": [
-        "eor w26, w7, w5",
-        "bfxil x7, x26, #0, #16",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x7, x20",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #16"
       ]
     },
     "xor ebx, ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x31",
       "ExpectedArm64ASM": [
-        "eor w7, w7, w5",
-        "mov x26, x7",
-        "tst w7, w7"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x7, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "xor rbx, rcx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x31",
       "ExpectedArm64ASM": [
-        "eor x7, x7, x5",
-        "mov x26, x7",
-        "tst x7, x7"
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor x22, x21, x20",
+        "mov x7, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "db 0x32, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x32",
         "xor bl, cl but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w26, w5, w7",
-        "bfxil x5, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x5, x20",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #24"
       ]
     },
     "db 0x66, 0x33, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0x33",
         "xor bx, cx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w26, w5, w7",
-        "bfxil x5, x26, #0, #16",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x5, x20",
+        "mov x26, x22",
+        "cmn wzr, w22, lsl #16"
       ]
     },
     "db 0x33, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x33",
         "xor ebx, ecx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w5, w5, w7",
-        "mov x26, x5",
-        "tst w5, w5"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x5, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "db 0x48, 0x33, 0xcb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x33",
         "xor rbx, rcx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor x5, x5, x7",
-        "mov x26, x5",
-        "tst x5, x5"
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor x22, x21, x20",
+        "mov x5, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "xor al, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x34",
       "ExpectedArm64ASM": [
-        "eor w26, w4, #0x1",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "eor w21, w20, #0x1",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "xor ax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x35",
       "ExpectedArm64ASM": [
-        "eor w26, w4, #0x1",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x4",
+        "eor w21, w20, #0x1",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #16"
       ]
     },
     "xor eax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x35",
       "ExpectedArm64ASM": [
-        "eor w4, w4, #0x1",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x4",
+        "eor w21, w20, #0x1",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "xor rax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x35",
       "ExpectedArm64ASM": [
-        "eor x4, x4, #0x1",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x4",
+        "eor x21, x20, #0x1",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst x21, x21"
       ]
     },
     "cmp bl, cl": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x38",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "lsl w0, w7, #24",
-        "cmp w0, w5, lsl #24",
-        "sub w26, w7, w5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
+        "cmp w0, w20, lsl #24",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "xor al, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x34",
       "ExpectedArm64ASM": [
-        "eor w26, w4, #0xff",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "eor w21, w20, #0xff",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "xor ax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x35",
       "ExpectedArm64ASM": [
-        "eor w26, w4, #0xffff",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x4",
+        "eor w21, w20, #0xffff",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #16"
       ]
     },
     "xor eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x35",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "eor w4, w4, w20",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x21, x4",
+        "eor w22, w21, w20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "xor rax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x35",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "eor x4, x4, x20",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x21, x4",
+        "eor x22, x21, x20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "cmp bx, cx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x39",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "lsl w0, w7, #16",
-        "cmp w0, w5, lsl #16",
-        "sub w26, w7, w5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
+        "cmp w0, w20, lsl #16",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmp ebx, ecx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x39",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "subs w26, w7, w5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmp rbx, rcx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x39",
       "ExpectedArm64ASM": [
-        "eor w27, w7, w5",
-        "subs x26, x7, x5",
+        "mov x20, x5",
+        "mov x21, x7",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs x22, x21, x20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "db 0x3A, 0xcb": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "0x3A",
         "cmp bl, cl but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "lsl w0, w5, #24",
-        "cmp w0, w7, lsl #24",
-        "sub w26, w5, w7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
+        "cmp w0, w20, lsl #24",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "db 0x66, 0x3B, 0xcb": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "0x3B",
         "cmp bx, cx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "lsl w0, w5, #16",
-        "cmp w0, w7, lsl #16",
-        "sub w26, w5, w7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
+        "cmp w0, w20, lsl #16",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "db 0x3B, 0xcb": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0x3B",
         "cmp ebx, ecx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "subs w26, w5, w7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "db 0x48, 0x3B, 0xcb": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0x3B",
         "cmp rbx, rcx but modrm.rm as source"
       ],
       "ExpectedArm64ASM": [
-        "eor w27, w5, w7",
-        "subs x26, x5, x7",
+        "mov x20, x7",
+        "mov x21, x5",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs x22, x21, x20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmp al, 1": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x3C",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #24",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w27, #0x1 (1)",
+        "sub w20, w21, #0x1 (1)",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmp ax, 1": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x3D",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #16",
         "cmp w0, w20, lsl #16",
-        "sub w26, w27, #0x1 (1)",
+        "sub w20, w21, #0x1 (1)",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmp eax, 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x3D",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs w26, w27, #0x1 (1)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs w21, w20, #0x1 (1)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmp rax, 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x3D",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs x26, x27, #0x1 (1)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs x21, x20, #0x1 (1)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmp al, -1": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x3C",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "lsl w0, w4, #24",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w4, #0xff (255)",
+        "sub w20, w21, #0xff (255)",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmp ax, -1": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x3D",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "mvn w27, w4",
-        "lsl w0, w4, #16",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
         "cmp w0, w20, lsl #16",
-        "sub w26, w4, w20",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmp eax, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x3D",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "subs w26, w4, w20",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmp rax, -1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x3D",
       "ExpectedArm64ASM": [
-        "mvn w27, w4",
-        "adds x26, x4, #0x1 (1)",
+        "mov x20, x4",
+        "mvn w21, w20",
+        "mov x27, x21",
+        "adds x21, x20, #0x1 (1)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "push ax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x50",
       "ExpectedArm64ASM": [
-        "strh w4, [x8, #-2]!"
+        "mov x20, x4",
+        "mov x21, x8",
+        "mov x22, x21",
+        "strh w20, [x22, #-2]!",
+        "mov x8, x22"
       ]
     },
     "push rax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x50",
       "ExpectedArm64ASM": [
-        "str x4, [x8, #-8]!"
+        "mov x20, x4",
+        "mov x21, x8",
+        "mov x22, x21",
+        "str x20, [x22, #-8]!",
+        "mov x8, x22"
       ]
     },
     "pop ax": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x8f",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x8]",
-        "add x8, x8, #0x2 (2)",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x8",
+        "ldrh w21, [x20]",
+        "add x22, x20, #0x2 (2)",
+        "mov x8, x22",
+        "mov x20, x4",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "pop rax": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x8f",
       "ExpectedArm64ASM": [
-        "ldr x4, [x8]",
-        "add x8, x8, #0x8 (8)"
+        "mov x20, x8",
+        "ldr x21, [x20]",
+        "add x22, x20, #0x8 (8)",
+        "mov x8, x22",
+        "mov x4, x21"
       ]
     },
     "movsxd rax, ebx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x63",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "sxtw x4, w20"
+        "mov x20, x7",
+        "mov w21, w20",
+        "sxtw x20, w21",
+        "mov x4, x20"
       ]
     },
     "push word 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x68",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "strh w20, [x8, #-2]!"
+        "mov x21, x8",
+        "mov x22, x21",
+        "strh w20, [x22, #-2]!",
+        "mov x8, x22"
       ]
     },
     "push qword 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x68",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "str x20, [x8, #-8]!"
+        "mov x21, x8",
+        "mov x22, x21",
+        "str x20, [x22, #-8]!",
+        "mov x8, x22"
       ]
     },
     "imul ax, bx, 257": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x69",
       "ExpectedArm64ASM": [
-        "sxth x20, w7",
-        "mov w21, #0x101",
-        "mul x20, x20, x21",
-        "sbfx x21, x20, #16, #16",
-        "bfxil x4, x20, #0, #16",
-        "sbfx x20, x20, #15, #1",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "sxth x21, w20",
+        "mov w20, #0x101",
+        "mul x22, x21, x20",
+        "sbfx x20, x22, #16, #16",
+        "mov x21, x4",
+        "mov x23, x21",
+        "bfxil x23, x22, #0, #16",
+        "mov x4, x23",
+        "sbfx x21, x22, #15, #1",
+        "cmp x20, x21",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul eax, ebx, 257": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x69",
       "ExpectedArm64ASM": [
-        "mov w20, #0x101",
-        "smull x21, w7, w20",
-        "asr x21, x21, #32",
-        "mul w4, w7, w20",
-        "sbfx x20, x4, #31, #1",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "mov w21, #0x101",
+        "smull x22, w20, w21",
+        "asr x23, x22, #32",
+        "mul w22, w20, w21",
+        "mov x4, x22",
+        "sbfx x20, x22, #31, #1",
+        "cmp x23, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul rax, rbx, 257": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x69",
       "ExpectedArm64ASM": [
-        "mov w20, #0x101",
-        "smulh x21, x7, x20",
-        "mul x4, x7, x20",
-        "asr x20, x4, #63",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "mov w21, #0x101",
+        "smulh x22, x20, x21",
+        "mul x23, x20, x21",
+        "mov x4, x23",
+        "asr x20, x23, #63",
+        "cmp x22, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "push word -1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x6a",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "strh w20, [x8, #-2]!"
+        "mov x21, x8",
+        "mov x22, x21",
+        "strh w20, [x22, #-2]!",
+        "mov x8, x22"
       ]
     },
     "push dword -1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x6a",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "str x20, [x8, #-8]!"
+        "mov x21, x8",
+        "mov x22, x21",
+        "str x20, [x22, #-8]!",
+        "mov x8, x22"
       ]
     },
     "push qword -1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x6a",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "str x20, [x8, #-8]!"
+        "mov x21, x8",
+        "mov x22, x21",
+        "str x20, [x22, #-8]!",
+        "mov x8, x22"
       ]
     },
     "imul ax, bx, 3": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x6b",
       "ExpectedArm64ASM": [
-        "sxth x20, w7",
-        "mov w21, #0x3",
-        "mul x20, x20, x21",
-        "sbfx x21, x20, #16, #16",
-        "bfxil x4, x20, #0, #16",
-        "sbfx x20, x20, #15, #1",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "sxth x21, w20",
+        "mov w20, #0x3",
+        "mul x22, x21, x20",
+        "sbfx x20, x22, #16, #16",
+        "mov x21, x4",
+        "mov x23, x21",
+        "bfxil x23, x22, #0, #16",
+        "mov x4, x23",
+        "sbfx x21, x22, #15, #1",
+        "cmp x20, x21",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul eax, ebx, 3": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x6b",
       "ExpectedArm64ASM": [
-        "mov w20, #0x3",
-        "smull x21, w7, w20",
-        "asr x21, x21, #32",
-        "mul w4, w7, w20",
-        "sbfx x20, x4, #31, #1",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "mov w21, #0x3",
+        "smull x22, w20, w21",
+        "asr x23, x22, #32",
+        "mul w22, w20, w21",
+        "mov x4, x22",
+        "sbfx x20, x22, #31, #1",
+        "cmp x23, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul rax, rbx, 3": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x6b",
       "ExpectedArm64ASM": [
-        "mov w20, #0x3",
-        "smulh x21, x7, x20",
-        "mul x4, x7, x20",
-        "asr x20, x4, #63",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "mov w21, #0x3",
+        "smulh x22, x20, x21",
+        "mul x23, x20, x21",
+        "mov x4, x23",
+        "asr x20, x23, #63",
+        "cmp x22, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "test al, bl": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x84",
       "ExpectedArm64ASM": [
-        "and w26, w4, w7",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x7",
+        "mov x21, x4",
+        "and w22, w21, w20",
+        "cmn wzr, w22, lsl #24",
+        "mov x26, x22"
       ]
     },
     "test ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x84",
       "ExpectedArm64ASM": [
-        "and w26, w4, w7",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x7",
+        "mov x21, x4",
+        "and w22, w21, w20",
+        "cmn wzr, w22, lsl #16",
+        "mov x26, x22"
       ]
     },
     "test eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x84",
       "ExpectedArm64ASM": [
-        "ands w26, w4, w7"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ands w22, w21, w20",
+        "mov x26, x22"
       ]
     },
     "test rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x84",
       "ExpectedArm64ASM": [
-        "ands x26, x4, x7"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ands x22, x21, x20",
+        "mov x26, x22"
       ]
     },
     "test al, al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x84",
       "ExpectedArm64ASM": [
-        "mov x26, x4",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "mov x21, x20",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21"
       ]
     },
     "test ax, ax": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x84",
       "ExpectedArm64ASM": [
-        "mov x26, x4",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x4",
+        "mov x21, x20",
+        "cmn wzr, w21, lsl #16",
+        "mov x26, x21"
       ]
     },
     "test eax, eax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x84",
       "ExpectedArm64ASM": [
-        "ands w26, w4, w4"
+        "mov x20, x4",
+        "ands w21, w20, w20",
+        "mov x26, x21"
       ]
     },
     "test rax, rax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x84",
       "ExpectedArm64ASM": [
-        "ands x26, x4, x4"
+        "mov x20, x4",
+        "ands x21, x20, x20",
+        "mov x26, x21"
       ]
     },
     "xchg bl, cl": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x86",
       "ExpectedArm64ASM": [
-        "mov x20, x5",
-        "mov x5, x20",
-        "bfxil x5, x7, #0, #8",
-        "bfxil x7, x20, #0, #8"
+        "mov x20, x7",
+        "mov x21, x5",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x5, x22",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x7, x22"
       ]
     },
     "xchg [rax], cl": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x86",
       "ExpectedArm64ASM": [
-        "swpalb w5, w20, [x4]",
-        "bfxil x5, x20, #0, #8"
+        "mov x20, x5",
+        "mov x21, x4",
+        "swpalb w20, w22, [x21]",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #8",
+        "mov x5, x21"
       ]
     },
     "xchg bx, cx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x87",
       "ExpectedArm64ASM": [
-        "mov x20, x5",
-        "mov x5, x20",
-        "bfxil x5, x7, #0, #16",
-        "bfxil x7, x20, #0, #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x5, x22",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x7, x22"
       ]
     },
     "xchg [rax], cx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x87",
       "ExpectedArm64ASM": [
-        "swpalh w5, w20, [x4]",
-        "bfxil x5, x20, #0, #16"
+        "mov x20, x5",
+        "mov x21, x4",
+        "swpalh w20, w22, [x21]",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x5, x21"
       ]
     },
     "xchg ebx, ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x87",
       "ExpectedArm64ASM": [
-        "mov x20, x5",
-        "mov w5, w7",
-        "mov w7, w20"
+        "mov x20, x7",
+        "mov x21, x5",
+        "mov w22, w20",
+        "mov x5, x22",
+        "mov w20, w21",
+        "mov x7, x20"
       ]
     },
     "xchg [rax], ecx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x87",
       "ExpectedArm64ASM": [
-        "swpal w5, w5, [x4]"
+        "mov x20, x5",
+        "mov x21, x4",
+        "swpal w20, w22, [x21]",
+        "mov x5, x22"
       ]
     },
     "xchg rbx, rcx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x87",
       "ExpectedArm64ASM": [
         "mov x20, x7",
-        "mov x7, x5",
-        "mov x5, x20"
+        "mov x21, x5",
+        "mov x5, x20",
+        "mov x7, x21"
       ]
     },
     "xchg [rax], rcx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x87",
       "ExpectedArm64ASM": [
-        "swpal x5, x5, [x4]"
+        "mov x20, x5",
+        "mov x21, x4",
+        "swpal x20, x22, [x21]",
+        "mov x5, x22"
       ]
     },
     "mov [rax], bl": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x88",
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "strb w20, [x4]"
+        "mov x20, x7",
+        "uxtb w21, w20",
+        "mov x20, x4",
+        "strb w21, [x20]"
       ]
     },
     "mov [rax], bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x89",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "strh w20, [x4]"
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "strh w21, [x20]"
       ]
     },
     "mov [rax], ebx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x89",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "str w20, [x4]"
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "str w21, [x20]"
       ]
     },
     "mov [rax], rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x89",
       "ExpectedArm64ASM": [
-        "str x7, [x4]"
+        "mov x20, x7",
+        "mov x21, x4",
+        "str x20, [x21]"
       ]
     },
     "mov bl, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x8a",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "bfxil x7, x20, #0, #8"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "mov x20, x7",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x7, x22"
       ]
     },
     "mov bx, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x8b",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x4]",
-        "bfxil x7, x20, #0, #16"
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "mov x20, x7",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x7, x22"
       ]
     },
     "mov ebx, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x8b",
       "ExpectedArm64ASM": [
-        "ldr w7, [x4]"
+        "mov x20, x4",
+        "ldr w21, [x20]",
+        "mov x7, x21"
       ]
     },
     "mov rbx, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x8b",
       "ExpectedArm64ASM": [
-        "ldr x7, [x4]"
+        "mov x20, x4",
+        "ldr x21, [x20]",
+        "mov x7, x21"
       ]
     },
     "mov ax, cs": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
         "ldrh w20, [x28, #138]",
-        "bfxil x4, x20, #0, #16"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "mov eax, cs": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
-        "ldrh w4, [x28, #138]"
+        "ldrh w20, [x28, #138]",
+        "mov x4, x20"
       ]
     },
     "mov rax, cs": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
-        "ldrh w4, [x28, #138]"
+        "ldrh w20, [x28, #138]",
+        "mov x4, x20"
       ]
     },
     "mov ax, es": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
         "ldrh w20, [x28, #136]",
-        "bfxil x4, x20, #0, #16"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "mov eax, es": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
-        "ldrh w4, [x28, #136]"
+        "ldrh w20, [x28, #136]",
+        "mov x4, x20"
       ]
     },
     "mov rax, es": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
-        "ldrh w4, [x28, #136]"
+        "ldrh w20, [x28, #136]",
+        "mov x4, x20"
       ]
     },
     "mov ax, ss": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
         "ldrh w20, [x28, #140]",
-        "bfxil x4, x20, #0, #16"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "mov eax, ss": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
-        "ldrh w4, [x28, #140]"
+        "ldrh w20, [x28, #140]",
+        "mov x4, x20"
       ]
     },
     "mov rax, ss": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
-        "ldrh w4, [x28, #140]"
+        "ldrh w20, [x28, #140]",
+        "mov x4, x20"
       ]
     },
     "mov ax, ds": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
         "ldrh w20, [x28, #142]",
-        "bfxil x4, x20, #0, #16"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "mov eax, ds": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
-        "ldrh w4, [x28, #142]"
+        "ldrh w20, [x28, #142]",
+        "mov x4, x20"
       ]
     },
     "mov rax, ds": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
-        "ldrh w4, [x28, #142]"
+        "ldrh w20, [x28, #142]",
+        "mov x4, x20"
       ]
     },
     "mov ax, gs": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
-        "and x4, x4, #0xffffffffffff0000"
+        "mov x20, x4",
+        "and x21, x20, #0xffffffffffff0000",
+        "mov x4, x21"
       ]
     },
     "mov eax, gs": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
-        "mov w4, #0x0"
+        "mov w20, #0x0",
+        "mov x4, x20"
       ]
     },
     "mov rax, gs": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
-        "mov w4, #0x0"
+        "mov w20, #0x0",
+        "mov x4, x20"
       ]
     },
     "mov ax, fs": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
-        "and x4, x4, #0xffffffffffff0000"
+        "mov x20, x4",
+        "and x21, x20, #0xffffffffffff0000",
+        "mov x4, x21"
       ]
     },
     "mov eax, fs": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
-        "mov w4, #0x0"
+        "mov w20, #0x0",
+        "mov x4, x20"
       ]
     },
     "mov rax, fs": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0x8c",
       "ExpectedArm64ASM": [
-        "mov w4, #0x0"
+        "mov w20, #0x0",
+        "mov x4, x20"
       ]
     },
     "lea ax, [rbx+rcx*1 + 0]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21",
+        "mov x20, x4",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "lea eax, [rbx+rcx*1 + 0]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5",
-        "mov x20, x20",
-        "mov w4, w20"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21",
+        "mov x20, x22",
+        "mov w21, w20",
+        "mov x4, x21"
       ]
     },
     "lea rax, [rbx+rcx*1 + 0]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x4, x7, x5"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21",
+        "mov x4, x22"
       ]
     },
     "lea ax, [rbx+rcx*2 + 0]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5, lsl #1",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #1",
+        "mov x20, x4",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "lea eax, [rbx+rcx*2 + 0]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5, lsl #1",
-        "mov x20, x20",
-        "mov w4, w20"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #1",
+        "mov x20, x22",
+        "mov w21, w20",
+        "mov x4, x21"
       ]
     },
     "lea rax, [rbx+rcx*2 + 0]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x4, x7, x5, lsl #1"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #1",
+        "mov x4, x22"
       ]
     },
     "lea ax, [rbx+rcx*4 + 0]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5, lsl #2",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #2",
+        "mov x20, x4",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "lea eax, [rbx+rcx*4 + 0]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5, lsl #2",
-        "mov x20, x20",
-        "mov w4, w20"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #2",
+        "mov x20, x22",
+        "mov w21, w20",
+        "mov x4, x21"
       ]
     },
     "lea rax, [rbx+rcx*4 + 0]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x4, x7, x5, lsl #2"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #2",
+        "mov x4, x22"
       ]
     },
     "lea ax, [rbx+rcx*8 + 0]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5, lsl #3",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #3",
+        "mov x20, x4",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "lea eax, [rbx+rcx*8 + 0]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5, lsl #3",
-        "mov x20, x20",
-        "mov w4, w20"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #3",
+        "mov x20, x22",
+        "mov w21, w20",
+        "mov x4, x21"
       ]
     },
     "lea rax, [rbx+rcx*8 + 0]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x4, x7, x5, lsl #3"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #3",
+        "mov x4, x22"
       ]
     },
     "lea ax, [ebx+ecx*1 + 0]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5",
-        "mov w20, w20",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "lea eax, [ebx+ecx*1 + 0]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5",
-        "mov w4, w20"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "mov x4, x20"
       ]
     },
     "lea rax, [ebx+ecx*1 + 0]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5",
-        "mov w4, w20"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21",
+        "mov w20, w22",
+        "mov x4, x20"
       ]
     },
     "lea ax, [ebx+ecx*2 + 0]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5, lsl #1",
-        "mov w20, w20",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #1",
+        "mov w20, w22",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "lea eax, [ebx+ecx*2 + 0]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5, lsl #1",
-        "mov w4, w20"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #1",
+        "mov w20, w22",
+        "mov x4, x20"
       ]
     },
     "lea rax, [ebx+ecx*2 + 0]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5, lsl #1",
-        "mov w4, w20"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #1",
+        "mov w20, w22",
+        "mov x4, x20"
       ]
     },
     "lea ax, [ebx+ecx*4 + 0]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5, lsl #2",
-        "mov w20, w20",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #2",
+        "mov w20, w22",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "lea eax, [ebx+ecx*4 + 0]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5, lsl #2",
-        "mov w4, w20"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #2",
+        "mov w20, w22",
+        "mov x4, x20"
       ]
     },
     "lea rax, [ebx+ecx*4 + 0]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5, lsl #2",
-        "mov w4, w20"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #2",
+        "mov w20, w22",
+        "mov x4, x20"
       ]
     },
     "lea ax, [ebx+ecx*8 + 0]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5, lsl #3",
-        "mov w20, w20",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #3",
+        "mov w20, w22",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "lea eax, [ebx+ecx*8 + 0]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5, lsl #3",
-        "mov w4, w20"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #3",
+        "mov w20, w22",
+        "mov x4, x20"
       ]
     },
     "lea rax, [ebx+ecx*8 + 0]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x8d",
       "ExpectedArm64ASM": [
-        "add x20, x7, x5, lsl #3",
-        "mov w4, w20"
+        "mov x20, x7",
+        "mov x21, x5",
+        "add x22, x20, x21, lsl #3",
+        "mov w20, w22",
+        "mov x4, x20"
       ]
     },
     "mov cs, ax": {
@@ -2423,39 +3142,42 @@
       "Comment": "0x8e"
     },
     "mov es, ax": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x8e",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "strh w20, [x28, #136]",
-        "ubfx w20, w20, #3, #13",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "strh w21, [x28, #136]",
+        "ubfx w20, w21, #3, #13",
         "add x0, x28, x20, lsl #2",
-        "ldr w20, [x0, #896]",
-        "str w20, [x28, #152]"
+        "ldr w21, [x0, #896]",
+        "str w21, [x28, #152]"
       ]
     },
     "mov ss, ax": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x8e",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "strh w20, [x28, #140]",
-        "ubfx w20, w20, #3, #13",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "strh w21, [x28, #140]",
+        "ubfx w20, w21, #3, #13",
         "add x0, x28, x20, lsl #2",
-        "ldr w20, [x0, #896]",
-        "str w20, [x28, #160]"
+        "ldr w21, [x0, #896]",
+        "str w21, [x28, #160]"
       ]
     },
     "mov ds, ax": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x8e",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "strh w20, [x28, #142]",
-        "ubfx w20, w20, #3, #13",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "strh w21, [x28, #142]",
+        "ubfx w20, w21, #3, #13",
         "add x0, x28, x20, lsl #2",
-        "ldr w20, [x0, #896]",
-        "str w20, [x28, #164]"
+        "ldr w21, [x0, #896]",
+        "str w21, [x28, #164]"
       ]
     },
     "mov gs, ax": {
@@ -2469,49 +3191,63 @@
       "Comment": "0x8e"
     },
     "pop word [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x8f",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x8]",
-        "add x8, x8, #0x2 (2)",
-        "strh w20, [x4]"
+        "mov x20, x8",
+        "ldrh w21, [x20]",
+        "add x22, x20, #0x2 (2)",
+        "mov x8, x22",
+        "mov x20, x4",
+        "strh w21, [x20]"
       ]
     },
     "pop qword [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x8f",
       "ExpectedArm64ASM": [
-        "ldr x20, [x8]",
-        "add x8, x8, #0x8 (8)",
-        "str x20, [x4]"
+        "mov x20, x8",
+        "ldr x21, [x20]",
+        "add x22, x20, #0x8 (8)",
+        "mov x8, x22",
+        "mov x20, x4",
+        "str x21, [x20]"
       ]
     },
     "xchg ax, bx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x90",
       "ExpectedArm64ASM": [
-        "mov x20, x7",
-        "mov x7, x20",
-        "bfxil x7, x4, #0, #16",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x7, x22",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "xchg eax, ebx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x90",
       "ExpectedArm64ASM": [
-        "mov x20, x7",
-        "mov w7, w4",
-        "mov w4, w20"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov w22, w20",
+        "mov x7, x22",
+        "mov w20, w21",
+        "mov x4, x20"
       ]
     },
     "xchg rax, rbx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x90",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "mov x4, x7",
-        "mov x7, x20"
+        "mov x21, x7",
+        "mov x7, x20",
+        "mov x4, x21"
       ]
     },
     "nop": {
@@ -2520,50 +3256,65 @@
       "ExpectedArm64ASM": []
     },
     "cbw": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x98",
       "ExpectedArm64ASM": [
-        "sxtb w20, w4",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "sxtb w21, w20",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "cwde": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x98",
       "ExpectedArm64ASM": [
-        "sxth w4, w4"
+        "mov x20, x4",
+        "sxth w21, w20",
+        "mov x4, x21"
       ]
     },
     "cdqe": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x98",
       "ExpectedArm64ASM": [
-        "sxtw x4, w4"
+        "mov x20, x4",
+        "sxtw x21, w20",
+        "mov x4, x21"
       ]
     },
     "cwd": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x98",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "sbfx x20, x20, #15, #1",
-        "bfxil x6, x20, #0, #16"
+        "mov x20, x4",
+        "uxth w21, w20",
+        "sbfx x20, x21, #15, #1",
+        "mov x21, x6",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x6, x22"
       ]
     },
     "cdq": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x99",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "sbfx x20, x20, #31, #1",
-        "mov w6, w20"
+        "mov x20, x4",
+        "mov w21, w20",
+        "sbfx x20, x21, #31, #1",
+        "mov w21, w20",
+        "mov x6, x21"
       ]
     },
     "cqo": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x99",
       "ExpectedArm64ASM": [
-        "asr x6, x4, #63"
+        "mov x20, x4",
+        "asr x21, x20, #63",
+        "mov x6, x21"
       ]
     },
     "fwait": {
@@ -2572,184 +3323,207 @@
       "ExpectedArm64ASM": []
     },
     "pushf": {
-      "ExpectedInstructionCount": 39,
+      "ExpectedInstructionCount": 44,
       "Comment": "0x9c",
       "ExpectedArm64ASM": [
         "cset w20, hs",
-        "eor w21, w27, w26",
-        "ubfx w21, w21, #4, #1",
-        "orr x20, x20, x21, lsl #4",
-        "ldrb w21, [x28, #712]",
-        "orr x20, x20, x21, lsl #8",
-        "ldrb w21, [x28, #713]",
-        "orr x20, x20, x21, lsl #9",
-        "ldrsb x21, [x28, #714]",
-        "lsr x21, x21, #63",
-        "orr x20, x20, x21, lsl #10",
+        "mov x21, x27",
+        "mov x22, x26",
+        "eor w23, w21, w22",
+        "ubfx w21, w23, #4, #1",
+        "orr x23, x20, x21, lsl #4",
+        "ldrb w20, [x28, #712]",
+        "orr x21, x23, x20, lsl #8",
+        "ldrb w20, [x28, #713]",
+        "orr x23, x21, x20, lsl #9",
+        "ldrsb x20, [x28, #714]",
+        "lsr x21, x20, #63",
+        "orr x20, x23, x21, lsl #10",
         "cset w21, vs",
-        "orr x20, x20, x21, lsl #11",
-        "ldrb w21, [x28, #716]",
-        "orr x20, x20, x21, lsl #12",
-        "ldrb w21, [x28, #718]",
-        "orr x20, x20, x21, lsl #14",
-        "ldrb w21, [x28, #720]",
-        "orr x20, x20, x21, lsl #16",
-        "ldrb w21, [x28, #721]",
-        "orr x20, x20, x21, lsl #17",
-        "ldrb w21, [x28, #722]",
-        "orr x20, x20, x21, lsl #18",
-        "ldrb w21, [x28, #723]",
-        "orr x20, x20, x21, lsl #19",
-        "ldrb w21, [x28, #724]",
-        "orr x20, x20, x21, lsl #20",
-        "ldrb w21, [x28, #725]",
-        "orr x20, x20, x21, lsl #21",
-        "eor w21, w26, w26, lsr #4",
-        "eor w21, w21, w21, lsr #2",
-        "eor w21, w21, w21, lsr #1",
-        "orr x21, x21, #0xfffffffffffffffe",
-        "orn x20, x20, x21, ror #62",
+        "orr x23, x20, x21, lsl #11",
+        "ldrb w20, [x28, #716]",
+        "orr x21, x23, x20, lsl #12",
+        "ldrb w20, [x28, #718]",
+        "orr x23, x21, x20, lsl #14",
+        "ldrb w20, [x28, #720]",
+        "orr x21, x23, x20, lsl #16",
+        "ldrb w20, [x28, #721]",
+        "orr x23, x21, x20, lsl #17",
+        "ldrb w20, [x28, #722]",
+        "orr x21, x23, x20, lsl #18",
+        "ldrb w20, [x28, #723]",
+        "orr x23, x21, x20, lsl #19",
+        "ldrb w20, [x28, #724]",
+        "orr x21, x23, x20, lsl #20",
+        "ldrb w20, [x28, #725]",
+        "orr x23, x21, x20, lsl #21",
+        "eor w20, w22, w22, lsr #4",
+        "eor w21, w20, w20, lsr #2",
+        "eor w20, w21, w21, lsr #1",
+        "orr x21, x20, #0xfffffffffffffffe",
+        "orn x20, x23, x21, ror #62",
         "mrs x21, nzcv",
-        "and x21, x21, #0xc0000000",
-        "orr x20, x20, x21, lsr #24",
-        "orr x20, x20, #0x2",
-        "str x20, [x8, #-8]!"
+        "and x22, x21, #0xc0000000",
+        "orr x21, x20, x22, lsr #24",
+        "orr x20, x21, #0x2",
+        "mov x21, x8",
+        "mov x22, x21",
+        "str x20, [x22, #-8]!",
+        "mov x8, x22"
       ]
     },
     "pushfq": {
-      "ExpectedInstructionCount": 39,
+      "ExpectedInstructionCount": 44,
       "Comment": "0x9c",
       "ExpectedArm64ASM": [
         "cset w20, hs",
-        "eor w21, w27, w26",
-        "ubfx w21, w21, #4, #1",
-        "orr x20, x20, x21, lsl #4",
-        "ldrb w21, [x28, #712]",
-        "orr x20, x20, x21, lsl #8",
-        "ldrb w21, [x28, #713]",
-        "orr x20, x20, x21, lsl #9",
-        "ldrsb x21, [x28, #714]",
-        "lsr x21, x21, #63",
-        "orr x20, x20, x21, lsl #10",
+        "mov x21, x27",
+        "mov x22, x26",
+        "eor w23, w21, w22",
+        "ubfx w21, w23, #4, #1",
+        "orr x23, x20, x21, lsl #4",
+        "ldrb w20, [x28, #712]",
+        "orr x21, x23, x20, lsl #8",
+        "ldrb w20, [x28, #713]",
+        "orr x23, x21, x20, lsl #9",
+        "ldrsb x20, [x28, #714]",
+        "lsr x21, x20, #63",
+        "orr x20, x23, x21, lsl #10",
         "cset w21, vs",
-        "orr x20, x20, x21, lsl #11",
-        "ldrb w21, [x28, #716]",
-        "orr x20, x20, x21, lsl #12",
-        "ldrb w21, [x28, #718]",
-        "orr x20, x20, x21, lsl #14",
-        "ldrb w21, [x28, #720]",
-        "orr x20, x20, x21, lsl #16",
-        "ldrb w21, [x28, #721]",
-        "orr x20, x20, x21, lsl #17",
-        "ldrb w21, [x28, #722]",
-        "orr x20, x20, x21, lsl #18",
-        "ldrb w21, [x28, #723]",
-        "orr x20, x20, x21, lsl #19",
-        "ldrb w21, [x28, #724]",
-        "orr x20, x20, x21, lsl #20",
-        "ldrb w21, [x28, #725]",
-        "orr x20, x20, x21, lsl #21",
-        "eor w21, w26, w26, lsr #4",
-        "eor w21, w21, w21, lsr #2",
-        "eor w21, w21, w21, lsr #1",
-        "orr x21, x21, #0xfffffffffffffffe",
-        "orn x20, x20, x21, ror #62",
+        "orr x23, x20, x21, lsl #11",
+        "ldrb w20, [x28, #716]",
+        "orr x21, x23, x20, lsl #12",
+        "ldrb w20, [x28, #718]",
+        "orr x23, x21, x20, lsl #14",
+        "ldrb w20, [x28, #720]",
+        "orr x21, x23, x20, lsl #16",
+        "ldrb w20, [x28, #721]",
+        "orr x23, x21, x20, lsl #17",
+        "ldrb w20, [x28, #722]",
+        "orr x21, x23, x20, lsl #18",
+        "ldrb w20, [x28, #723]",
+        "orr x23, x21, x20, lsl #19",
+        "ldrb w20, [x28, #724]",
+        "orr x21, x23, x20, lsl #20",
+        "ldrb w20, [x28, #725]",
+        "orr x23, x21, x20, lsl #21",
+        "eor w20, w22, w22, lsr #4",
+        "eor w21, w20, w20, lsr #2",
+        "eor w20, w21, w21, lsr #1",
+        "orr x21, x20, #0xfffffffffffffffe",
+        "orn x20, x23, x21, ror #62",
         "mrs x21, nzcv",
-        "and x21, x21, #0xc0000000",
-        "orr x20, x20, x21, lsr #24",
-        "orr x20, x20, #0x2",
-        "str x20, [x8, #-8]!"
+        "and x22, x21, #0xc0000000",
+        "orr x21, x20, x22, lsr #24",
+        "orr x20, x21, #0x2",
+        "mov x21, x8",
+        "mov x22, x21",
+        "str x20, [x22, #-8]!",
+        "mov x8, x22"
       ]
     },
     "popf": {
-      "ExpectedInstructionCount": 39,
+      "ExpectedInstructionCount": 43,
       "Comment": "0x9d",
       "ExpectedArm64ASM": [
-        "ldr x20, [x8]",
-        "add x8, x8, #0x8 (8)",
-        "mov w21, #0x202",
-        "orr x27, x20, x21",
-        "ubfx x20, x27, #0, #1",
-        "lsl x20, x20, #29",
-        "ubfx w21, w27, #2, #1",
-        "mov w22, #0x1",
-        "eor w26, w21, #0x1",
-        "ubfx x21, x27, #6, #1",
-        "orr w20, w20, w21, lsl #30",
-        "ubfx x21, x27, #7, #1",
-        "orr w20, w20, w21, lsl #31",
-        "ubfx w21, w27, #8, #1",
-        "strb w21, [x28, #712]",
-        "ubfx w21, w27, #9, #1",
-        "strb w21, [x28, #713]",
-        "ubfx w21, w27, #10, #1",
-        "sub x21, x22, x21, lsl #1",
-        "strb w21, [x28, #714]",
-        "ubfx x21, x27, #11, #1",
-        "orr w20, w20, w21, lsl #28",
-        "ubfx w21, w27, #12, #1",
-        "strb w21, [x28, #716]",
-        "ubfx w21, w27, #14, #1",
-        "strb w21, [x28, #718]",
-        "ubfx w21, w27, #16, #1",
-        "strb w21, [x28, #720]",
-        "ubfx w21, w27, #17, #1",
-        "strb w21, [x28, #721]",
-        "ubfx w21, w27, #18, #1",
-        "strb w21, [x28, #722]",
-        "ubfx w21, w27, #19, #1",
-        "strb w21, [x28, #723]",
-        "ubfx w21, w27, #20, #1",
-        "strb w21, [x28, #724]",
-        "ubfx w21, w27, #21, #1",
-        "strb w21, [x28, #725]",
-        "msr nzcv, x20"
+        "mov x20, x8",
+        "ldr x21, [x20]",
+        "add x22, x20, #0x8 (8)",
+        "mov x8, x22",
+        "mov w20, #0x202",
+        "orr x22, x21, x20",
+        "ubfx x20, x22, #0, #1",
+        "lsl x21, x20, #29",
+        "ubfx w20, w22, #2, #1",
+        "mov w23, #0x1",
+        "eor w24, w20, #0x1",
+        "mov x26, x24",
+        "mov x27, x22",
+        "ubfx x20, x22, #6, #1",
+        "orr w24, w21, w20, lsl #30",
+        "ubfx x20, x22, #7, #1",
+        "orr w21, w24, w20, lsl #31",
+        "ubfx w20, w22, #8, #1",
+        "strb w20, [x28, #712]",
+        "ubfx w20, w22, #9, #1",
+        "strb w20, [x28, #713]",
+        "ubfx w20, w22, #10, #1",
+        "sub x24, x23, x20, lsl #1",
+        "strb w24, [x28, #714]",
+        "ubfx x20, x22, #11, #1",
+        "orr w23, w21, w20, lsl #28",
+        "ubfx w20, w22, #12, #1",
+        "strb w20, [x28, #716]",
+        "ubfx w20, w22, #14, #1",
+        "strb w20, [x28, #718]",
+        "ubfx w20, w22, #16, #1",
+        "strb w20, [x28, #720]",
+        "ubfx w20, w22, #17, #1",
+        "strb w20, [x28, #721]",
+        "ubfx w20, w22, #18, #1",
+        "strb w20, [x28, #722]",
+        "ubfx w20, w22, #19, #1",
+        "strb w20, [x28, #723]",
+        "ubfx w20, w22, #20, #1",
+        "strb w20, [x28, #724]",
+        "ubfx w20, w22, #21, #1",
+        "strb w20, [x28, #725]",
+        "msr nzcv, x23"
       ]
     },
     "sahf": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 20,
       "Comment": "0x9e",
       "ExpectedArm64ASM": [
-        "ubfx w20, w4, #8, #8",
-        "mov w21, #0x28",
-        "bic x20, x20, x21",
-        "orr x27, x20, #0x2",
-        "ubfx x20, x27, #0, #1",
-        "mrs x21, nzcv",
-        "mov w0, w21",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
-        "ubfx w21, w27, #2, #1",
-        "eor w26, w21, #0x1",
-        "ubfx x21, x27, #6, #1",
-        "bfi w20, w21, #30, #1",
-        "ubfx x21, x27, #7, #1",
+        "mov x20, x4",
+        "ubfx w21, w20, #8, #8",
+        "mov w20, #0x28",
+        "bic x22, x21, x20",
+        "orr x20, x22, #0x2",
+        "ubfx x21, x20, #0, #1",
+        "mrs x22, nzcv",
+        "mov w23, w22",
+        "bfi w23, w21, #29, #1",
+        "ubfx w21, w20, #2, #1",
+        "eor w22, w21, #0x1",
+        "mov x26, x22",
+        "mov x27, x20",
+        "ubfx x21, x20, #6, #1",
+        "mov w22, w23",
+        "bfi w22, w21, #30, #1",
+        "ubfx x21, x20, #7, #1",
+        "mov w20, w22",
         "bfi w20, w21, #31, #1",
         "msr nzcv, x20"
       ]
     },
     "lahf": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 19,
       "Comment": "0x9f",
       "ExpectedArm64ASM": [
         "cset w20, hs",
-        "eor w21, w27, w26",
-        "ubfx w21, w21, #4, #1",
-        "orr x20, x20, x21, lsl #4",
-        "eor w21, w26, w26, lsr #4",
-        "eor w21, w21, w21, lsr #2",
-        "eor w21, w21, w21, lsr #1",
-        "orr x21, x21, #0xfffffffffffffffe",
-        "orn x20, x20, x21, ror #62",
+        "mov x21, x27",
+        "mov x22, x26",
+        "eor w23, w21, w22",
+        "ubfx w21, w23, #4, #1",
+        "orr x23, x20, x21, lsl #4",
+        "eor w20, w22, w22, lsr #4",
+        "eor w21, w20, w20, lsr #2",
+        "eor w20, w21, w21, lsr #1",
+        "orr x21, x20, #0xfffffffffffffffe",
+        "orn x20, x23, x21, ror #62",
         "mrs x21, nzcv",
-        "and x21, x21, #0xc0000000",
-        "orr x20, x20, x21, lsr #24",
-        "orr x20, x20, #0x2",
-        "bfi x4, x20, #8, #8"
+        "and x22, x21, #0xc0000000",
+        "orr x21, x20, x22, lsr #24",
+        "orr x20, x21, #0x2",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfi x22, x20, #8, #8",
+        "mov x4, x22"
       ]
     },
     "db 0x48, 0xa1; dq 0x00000000e0000008": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "mov rax, [0xe0000008]",
         "0xa1"
@@ -2759,11 +3533,12 @@
         "movk x20, #0x3020, lsl #16",
         "movk x20, #0x3678, lsl #32",
         "movk x20, #0x2c37, lsl #48",
-        "ldr x4, [x20]"
+        "ldr x21, [x20]",
+        "mov x4, x21"
       ]
     },
     "db 0x67, 0xa1; dd 0xe0000000": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "mov eax, [0xe0000000]",
         "0xa1"
@@ -2771,100 +3546,122 @@
       "ExpectedArm64ASM": [
         "mov w20, #0x6264",
         "movk w20, #0x3020, lsl #16",
-        "ldr w4, [x20]"
+        "ldr w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "db 0x48, 0xa3; dq 0x00000000e0000008": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "mov [0xe0000008], rax",
         "0xa3"
       ],
       "ExpectedArm64ASM": [
-        "mov x20, #0x6264",
-        "movk x20, #0x3020, lsl #16",
-        "movk x20, #0x3678, lsl #32",
-        "movk x20, #0x2c37, lsl #48",
-        "str x4, [x20]"
+        "mov x20, x4",
+        "mov x21, #0x6264",
+        "movk x21, #0x3020, lsl #16",
+        "movk x21, #0x3678, lsl #32",
+        "movk x21, #0x2c37, lsl #48",
+        "str x20, [x21]"
       ]
     },
     "db 0x67, 0xa3; dd 0xe0000000": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "mov [0xe0000000], eax",
         "0xa3"
       ],
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "mov w21, #0x6f6d",
-        "movk w21, #0x7376, lsl #16",
-        "str w20, [x21]"
+        "mov x20, x4",
+        "mov w21, w20",
+        "mov w20, #0x6f6d",
+        "movk w20, #0x7376, lsl #16",
+        "str w21, [x20]"
       ]
     },
     "movsb": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xa4"
       ],
       "ExpectedArm64ASM": [
-        "ldrb w20, [x10]",
-        "strb w20, [x11]",
-        "ldrsb x20, [x28, #714]",
-        "add x10, x10, x20",
-        "add x11, x11, x20"
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrb w22, [x20]",
+        "strb w22, [x21]",
+        "ldrsb x22, [x28, #714]",
+        "add x23, x20, x22",
+        "add x20, x21, x22",
+        "mov x10, x23",
+        "mov x11, x20"
       ]
     },
     "movsw": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0xa5"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x10]",
-        "strh w20, [x11]",
-        "ldrsb x20, [x28, #714]",
-        "lsl x20, x20, #1",
-        "add x10, x10, x20",
-        "add x11, x11, x20"
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrh w22, [x20]",
+        "strh w22, [x21]",
+        "ldrsb x22, [x28, #714]",
+        "lsl x23, x22, #1",
+        "add x22, x20, x23",
+        "add x20, x21, x23",
+        "mov x10, x22",
+        "mov x11, x20"
       ]
     },
     "movsd": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0xa5"
       ],
       "ExpectedArm64ASM": [
-        "ldr w20, [x10]",
-        "str w20, [x11]",
-        "ldrsb x20, [x28, #714]",
-        "lsl x20, x20, #2",
-        "add x10, x10, x20",
-        "add x11, x11, x20"
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr w22, [x20]",
+        "str w22, [x21]",
+        "ldrsb x22, [x28, #714]",
+        "lsl x23, x22, #2",
+        "add x22, x20, x23",
+        "add x20, x21, x23",
+        "mov x10, x22",
+        "mov x11, x20"
       ]
     },
     "movsq": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0xa5"
       ],
       "ExpectedArm64ASM": [
-        "ldr x20, [x10]",
-        "str x20, [x11]",
-        "ldrsb x20, [x28, #714]",
-        "lsl x20, x20, #3",
-        "add x10, x10, x20",
-        "add x11, x11, x20"
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr x22, [x20]",
+        "str x22, [x21]",
+        "ldrsb x22, [x28, #714]",
+        "lsl x23, x22, #3",
+        "add x22, x20, x23",
+        "add x20, x21, x23",
+        "mov x10, x22",
+        "mov x11, x20"
       ]
     },
     "rep movsb": {
-      "ExpectedInstructionCount": 83,
+      "ExpectedInstructionCount": 89,
       "Comment": "0xa4",
       "ExpectedArm64ASM": [
-        "ldrsb x20, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
-        "mov x2, x10",
-        "tbnz w20, #1, #+0x94",
+        "mov x20, x10",
+        "mov x21, x11",
+        "mov x22, x5",
+        "ldrsb x23, [x28, #714]",
+        "mov x0, x22",
+        "mov x1, x21",
+        "mov x2, x20",
+        "tbnz w23, #1, #+0x94",
         "cbz x0, #+0x78",
         "sub x3, x1, x2",
         "tbz x3, #63, #+0x8",
@@ -2895,11 +3692,11 @@
         "strb w3, [x1], #1",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0xc",
-        "mov x0, x11",
-        "mov x1, x10",
-        "mov x2, x5",
-        "add x20, x0, x2",
-        "add x21, x1, x2",
+        "mov x0, x21",
+        "mov x1, x20",
+        "mov x2, x22",
+        "add x24, x0, x2",
+        "add x25, x1, x2",
         "b #+0xa0",
         "cbz x0, #+0x88",
         "sub x3, x1, x2",
@@ -2935,25 +3732,31 @@
         "strb w3, [x1], #-1",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0xc",
-        "mov x0, x11",
-        "mov x1, x10",
-        "mov x2, x5",
-        "sub x20, x0, x2",
-        "sub x21, x1, x2",
+        "mov x0, x21",
+        "mov x1, x20",
+        "mov x2, x22",
+        "sub x24, x0, x2",
+        "sub x25, x1, x2",
+        "mov x20, x24",
+        "mov x21, x25",
+        "mov w22, #0x0",
+        "mov x5, x22",
         "mov x11, x20",
-        "mov x10, x21",
-        "mov w5, #0x0"
+        "mov x10, x21"
       ]
     },
     "rep movsw": {
-      "ExpectedInstructionCount": 83,
+      "ExpectedInstructionCount": 89,
       "Comment": "0xa5",
       "ExpectedArm64ASM": [
-        "ldrsb x20, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
-        "mov x2, x10",
-        "tbnz w20, #1, #+0x94",
+        "mov x20, x10",
+        "mov x21, x11",
+        "mov x22, x5",
+        "ldrsb x23, [x28, #714]",
+        "mov x0, x22",
+        "mov x1, x21",
+        "mov x2, x20",
+        "tbnz w23, #1, #+0x94",
         "cbz x0, #+0x78",
         "sub x3, x1, x2",
         "tbz x3, #63, #+0x8",
@@ -2984,11 +3787,11 @@
         "strh w3, [x1], #2",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0xc",
-        "mov x0, x11",
-        "mov x1, x10",
-        "mov x2, x5",
-        "add x20, x0, x2, lsl #1",
-        "add x21, x1, x2, lsl #1",
+        "mov x0, x21",
+        "mov x1, x20",
+        "mov x2, x22",
+        "add x24, x0, x2, lsl #1",
+        "add x25, x1, x2, lsl #1",
         "b #+0xa0",
         "cbz x0, #+0x88",
         "sub x3, x1, x2",
@@ -3024,25 +3827,31 @@
         "strh w3, [x1], #-2",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0xc",
-        "mov x0, x11",
-        "mov x1, x10",
-        "mov x2, x5",
-        "sub x20, x0, x2, lsl #1",
-        "sub x21, x1, x2, lsl #1",
+        "mov x0, x21",
+        "mov x1, x20",
+        "mov x2, x22",
+        "sub x24, x0, x2, lsl #1",
+        "sub x25, x1, x2, lsl #1",
+        "mov x20, x24",
+        "mov x21, x25",
+        "mov w22, #0x0",
+        "mov x5, x22",
         "mov x11, x20",
-        "mov x10, x21",
-        "mov w5, #0x0"
+        "mov x10, x21"
       ]
     },
     "rep movsd": {
-      "ExpectedInstructionCount": 83,
+      "ExpectedInstructionCount": 89,
       "Comment": "0xa5",
       "ExpectedArm64ASM": [
-        "ldrsb x20, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
-        "mov x2, x10",
-        "tbnz w20, #1, #+0x94",
+        "mov x20, x10",
+        "mov x21, x11",
+        "mov x22, x5",
+        "ldrsb x23, [x28, #714]",
+        "mov x0, x22",
+        "mov x1, x21",
+        "mov x2, x20",
+        "tbnz w23, #1, #+0x94",
         "cbz x0, #+0x78",
         "sub x3, x1, x2",
         "tbz x3, #63, #+0x8",
@@ -3073,11 +3882,11 @@
         "str w3, [x1], #4",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0xc",
-        "mov x0, x11",
-        "mov x1, x10",
-        "mov x2, x5",
-        "add x20, x0, x2, lsl #2",
-        "add x21, x1, x2, lsl #2",
+        "mov x0, x21",
+        "mov x1, x20",
+        "mov x2, x22",
+        "add x24, x0, x2, lsl #2",
+        "add x25, x1, x2, lsl #2",
         "b #+0xa0",
         "cbz x0, #+0x88",
         "sub x3, x1, x2",
@@ -3113,25 +3922,31 @@
         "str w3, [x1], #-4",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0xc",
-        "mov x0, x11",
-        "mov x1, x10",
-        "mov x2, x5",
-        "sub x20, x0, x2, lsl #2",
-        "sub x21, x1, x2, lsl #2",
+        "mov x0, x21",
+        "mov x1, x20",
+        "mov x2, x22",
+        "sub x24, x0, x2, lsl #2",
+        "sub x25, x1, x2, lsl #2",
+        "mov x20, x24",
+        "mov x21, x25",
+        "mov w22, #0x0",
+        "mov x5, x22",
         "mov x11, x20",
-        "mov x10, x21",
-        "mov w5, #0x0"
+        "mov x10, x21"
       ]
     },
     "rep movsq": {
-      "ExpectedInstructionCount": 83,
+      "ExpectedInstructionCount": 89,
       "Comment": "0xa5",
       "ExpectedArm64ASM": [
-        "ldrsb x20, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
-        "mov x2, x10",
-        "tbnz w20, #1, #+0x94",
+        "mov x20, x10",
+        "mov x21, x11",
+        "mov x22, x5",
+        "ldrsb x23, [x28, #714]",
+        "mov x0, x22",
+        "mov x1, x21",
+        "mov x2, x20",
+        "tbnz w23, #1, #+0x94",
         "cbz x0, #+0x78",
         "sub x3, x1, x2",
         "tbz x3, #63, #+0x8",
@@ -3162,11 +3977,11 @@
         "str x3, [x1], #8",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0xc",
-        "mov x0, x11",
-        "mov x1, x10",
-        "mov x2, x5",
-        "add x20, x0, x2, lsl #3",
-        "add x21, x1, x2, lsl #3",
+        "mov x0, x21",
+        "mov x1, x20",
+        "mov x2, x22",
+        "add x24, x0, x2, lsl #3",
+        "add x25, x1, x2, lsl #3",
         "b #+0xa0",
         "cbz x0, #+0x88",
         "sub x3, x1, x2",
@@ -3202,468 +4017,686 @@
         "str x3, [x1], #-8",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0xc",
-        "mov x0, x11",
-        "mov x1, x10",
-        "mov x2, x5",
-        "sub x20, x0, x2, lsl #3",
-        "sub x21, x1, x2, lsl #3",
+        "mov x0, x21",
+        "mov x1, x20",
+        "mov x2, x22",
+        "sub x24, x0, x2, lsl #3",
+        "sub x25, x1, x2, lsl #3",
+        "mov x20, x24",
+        "mov x21, x25",
+        "mov w22, #0x0",
+        "mov x5, x22",
         "mov x11, x20",
-        "mov x10, x21",
-        "mov w5, #0x0"
+        "mov x10, x21"
       ]
     },
     "cmpsb": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xa6"
       ],
       "ExpectedArm64ASM": [
-        "ldrb w20, [x11]",
-        "ldrb w21, [x10]",
-        "ldrsb x22, [x28, #714]",
-        "add x11, x11, x22",
-        "add x10, x10, x22",
-        "eor w27, w21, w20",
-        "lsl w0, w21, #24",
-        "cmp w0, w20, lsl #24",
-        "sub w26, w21, w20",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrb w22, [x21]",
+        "ldrb w23, [x20]",
+        "ldrsb x24, [x28, #714]",
+        "add x25, x21, x24",
+        "mov x11, x25",
+        "add x21, x20, x24",
+        "mov x10, x21",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "lsl w0, w23, #24",
+        "cmp w0, w22, lsl #24",
+        "sub w20, w23, w22",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmpsw": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 19,
       "Comment": [
         "0xa7"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x11]",
-        "ldrh w21, [x10]",
-        "ldrsb x22, [x28, #714]",
-        "lsl x22, x22, #1",
-        "add x11, x11, x22",
-        "add x10, x10, x22",
-        "eor w27, w21, w20",
-        "lsl w0, w21, #16",
-        "cmp w0, w20, lsl #16",
-        "sub w26, w21, w20",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrh w22, [x21]",
+        "ldrh w23, [x20]",
+        "ldrsb x24, [x28, #714]",
+        "lsl x25, x24, #1",
+        "add x24, x21, x25",
+        "mov x11, x24",
+        "add x21, x20, x25",
+        "mov x10, x21",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "lsl w0, w23, #16",
+        "cmp w0, w22, lsl #16",
+        "sub w20, w23, w22",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmpsd": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "0xa7"
       ],
       "ExpectedArm64ASM": [
-        "ldr w20, [x11]",
-        "ldr w21, [x10]",
-        "ldrsb x22, [x28, #714]",
-        "lsl x22, x22, #2",
-        "add x11, x11, x22",
-        "add x10, x10, x22",
-        "eor w27, w21, w20",
-        "subs w26, w21, w20",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr w22, [x21]",
+        "ldr w23, [x20]",
+        "ldrsb x24, [x28, #714]",
+        "lsl x25, x24, #2",
+        "add x24, x21, x25",
+        "mov x11, x24",
+        "add x21, x20, x25",
+        "mov x10, x21",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "subs w20, w23, w22",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmpsq": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "0xa7"
       ],
       "ExpectedArm64ASM": [
-        "ldr x20, [x11]",
-        "ldr x21, [x10]",
-        "ldrsb x22, [x28, #714]",
-        "lsl x22, x22, #3",
-        "add x11, x11, x22",
-        "add x10, x10, x22",
-        "eor w27, w21, w20",
-        "subs x26, x21, x20",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr x22, [x21]",
+        "ldr x23, [x20]",
+        "ldrsb x24, [x28, #714]",
+        "lsl x25, x24, #3",
+        "add x24, x21, x25",
+        "mov x11, x24",
+        "add x21, x20, x25",
+        "mov x10, x21",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "subs x20, x23, x22",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "repz cmpsb": {
-      "ExpectedInstructionCount": 28,
+      "ExpectedInstructionCount": 48,
       "Comment": "0xa6",
       "ExpectedArm64ASM": [
-        "cbz x5, #+0x70",
+        "mov x20, x5",
+        "cbz x20, #+0xbc",
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x24",
-        "ldrb w26, [x11]",
-        "ldrb w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "add x11, x11, #0x1 (1)",
-        "add x10, x10, #0x1 (1)",
-        "ccmp x27, x26, #nzcv, ne",
-        "b.eq #-0x18",
-        "b #+0x20",
-        "ldrb w26, [x11]",
-        "ldrb w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x1 (1)",
-        "sub x10, x10, #0x1 (1)",
-        "ccmp x27, x26, #nzcv, ne",
-        "b.eq #-0x18",
-        "mov x20, x27",
-        "eor w27, w20, w26",
-        "lsl w0, w20, #24",
-        "cmp w0, w26, lsl #24",
-        "sub w26, w20, w26",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x44",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrb w22, [x21]",
+        "ldrb w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "add x24, x21, #0x1 (1)",
+        "mov x11, x24",
+        "add x21, x20, #0x1 (1)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nzcv, ne",
+        "b.eq #-0x38",
+        "b #+0x40",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrb w22, [x21]",
+        "ldrb w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "sub x24, x21, #0x1 (1)",
+        "mov x11, x24",
+        "sub x21, x20, #0x1 (1)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nzcv, ne",
+        "b.eq #-0x38",
+        "mov x20, x26",
+        "mov x21, x27",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
+        "cmp w0, w20, lsl #24",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "repz cmpsw": {
-      "ExpectedInstructionCount": 28,
+      "ExpectedInstructionCount": 48,
       "Comment": "0xa7",
       "ExpectedArm64ASM": [
-        "cbz x5, #+0x70",
+        "mov x20, x5",
+        "cbz x20, #+0xbc",
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x24",
-        "ldrh w26, [x11]",
-        "ldrh w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "add x11, x11, #0x2 (2)",
-        "add x10, x10, #0x2 (2)",
-        "ccmp x27, x26, #nzcv, ne",
-        "b.eq #-0x18",
-        "b #+0x20",
-        "ldrh w26, [x11]",
-        "ldrh w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x2 (2)",
-        "sub x10, x10, #0x2 (2)",
-        "ccmp x27, x26, #nzcv, ne",
-        "b.eq #-0x18",
-        "mov x20, x27",
-        "eor w27, w20, w26",
-        "lsl w0, w20, #16",
-        "cmp w0, w26, lsl #16",
-        "sub w26, w20, w26",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x44",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrh w22, [x21]",
+        "ldrh w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "add x24, x21, #0x2 (2)",
+        "mov x11, x24",
+        "add x21, x20, #0x2 (2)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nzcv, ne",
+        "b.eq #-0x38",
+        "b #+0x40",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrh w22, [x21]",
+        "ldrh w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "sub x24, x21, #0x2 (2)",
+        "mov x11, x24",
+        "sub x21, x20, #0x2 (2)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nzcv, ne",
+        "b.eq #-0x38",
+        "mov x20, x26",
+        "mov x21, x27",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
+        "cmp w0, w20, lsl #16",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "repz cmpsd": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 46,
       "Comment": "0xa7",
       "ExpectedArm64ASM": [
-        "cbz x5, #+0x68",
+        "mov x20, x5",
+        "cbz x20, #+0xb4",
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x24",
-        "ldr w26, [x11]",
-        "ldr w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "add x11, x11, #0x4 (4)",
-        "add x10, x10, #0x4 (4)",
-        "ccmp x27, x26, #nzcv, ne",
-        "b.eq #-0x18",
-        "b #+0x20",
-        "ldr w26, [x11]",
-        "ldr w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x4 (4)",
-        "sub x10, x10, #0x4 (4)",
-        "ccmp x27, x26, #nzcv, ne",
-        "b.eq #-0x18",
-        "mov x20, x27",
-        "eor w27, w20, w26",
-        "subs w26, w20, w26",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x44",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr w22, [x21]",
+        "ldr w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "add x24, x21, #0x4 (4)",
+        "mov x11, x24",
+        "add x21, x20, #0x4 (4)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nzcv, ne",
+        "b.eq #-0x38",
+        "b #+0x40",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr w22, [x21]",
+        "ldr w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "sub x24, x21, #0x4 (4)",
+        "mov x11, x24",
+        "sub x21, x20, #0x4 (4)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nzcv, ne",
+        "b.eq #-0x38",
+        "mov x20, x26",
+        "mov x21, x27",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "repz cmpsq": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 46,
       "Comment": "0xa7",
       "ExpectedArm64ASM": [
-        "cbz x5, #+0x68",
+        "mov x20, x5",
+        "cbz x20, #+0xb4",
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x24",
-        "ldr x26, [x11]",
-        "ldr x27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "add x11, x11, #0x8 (8)",
-        "add x10, x10, #0x8 (8)",
-        "ccmp x27, x26, #nzcv, ne",
-        "b.eq #-0x18",
-        "b #+0x20",
-        "ldr x26, [x11]",
-        "ldr x27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x8 (8)",
-        "sub x10, x10, #0x8 (8)",
-        "ccmp x27, x26, #nzcv, ne",
-        "b.eq #-0x18",
-        "mov x20, x27",
-        "eor w27, w20, w26",
-        "subs x26, x20, x26",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x44",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr x22, [x21]",
+        "ldr x23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "add x24, x21, #0x8 (8)",
+        "mov x11, x24",
+        "add x21, x20, #0x8 (8)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nzcv, ne",
+        "b.eq #-0x38",
+        "b #+0x40",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr x22, [x21]",
+        "ldr x23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "sub x24, x21, #0x8 (8)",
+        "mov x11, x24",
+        "sub x21, x20, #0x8 (8)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nzcv, ne",
+        "b.eq #-0x38",
+        "mov x20, x26",
+        "mov x21, x27",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs x22, x21, x20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "repnz cmpsb": {
-      "ExpectedInstructionCount": 28,
+      "ExpectedInstructionCount": 48,
       "Comment": "0xa6",
       "ExpectedArm64ASM": [
-        "cbz x5, #+0x70",
+        "mov x20, x5",
+        "cbz x20, #+0xbc",
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x24",
-        "ldrb w26, [x11]",
-        "ldrb w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "add x11, x11, #0x1 (1)",
-        "add x10, x10, #0x1 (1)",
-        "ccmp x27, x26, #nZcv, ne",
-        "b.ne #-0x18",
-        "b #+0x20",
-        "ldrb w26, [x11]",
-        "ldrb w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x1 (1)",
-        "sub x10, x10, #0x1 (1)",
-        "ccmp x27, x26, #nZcv, ne",
-        "b.ne #-0x18",
-        "mov x20, x27",
-        "eor w27, w20, w26",
-        "lsl w0, w20, #24",
-        "cmp w0, w26, lsl #24",
-        "sub w26, w20, w26",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x44",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrb w22, [x21]",
+        "ldrb w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "add x24, x21, #0x1 (1)",
+        "mov x11, x24",
+        "add x21, x20, #0x1 (1)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nZcv, ne",
+        "b.ne #-0x38",
+        "b #+0x40",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrb w22, [x21]",
+        "ldrb w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "sub x24, x21, #0x1 (1)",
+        "mov x11, x24",
+        "sub x21, x20, #0x1 (1)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nZcv, ne",
+        "b.ne #-0x38",
+        "mov x20, x26",
+        "mov x21, x27",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
+        "cmp w0, w20, lsl #24",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "repnz cmpsw": {
-      "ExpectedInstructionCount": 28,
+      "ExpectedInstructionCount": 48,
       "Comment": "0xa7",
       "ExpectedArm64ASM": [
-        "cbz x5, #+0x70",
+        "mov x20, x5",
+        "cbz x20, #+0xbc",
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x24",
-        "ldrh w26, [x11]",
-        "ldrh w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "add x11, x11, #0x2 (2)",
-        "add x10, x10, #0x2 (2)",
-        "ccmp x27, x26, #nZcv, ne",
-        "b.ne #-0x18",
-        "b #+0x20",
-        "ldrh w26, [x11]",
-        "ldrh w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x2 (2)",
-        "sub x10, x10, #0x2 (2)",
-        "ccmp x27, x26, #nZcv, ne",
-        "b.ne #-0x18",
-        "mov x20, x27",
-        "eor w27, w20, w26",
-        "lsl w0, w20, #16",
-        "cmp w0, w26, lsl #16",
-        "sub w26, w20, w26",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x44",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrh w22, [x21]",
+        "ldrh w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "add x24, x21, #0x2 (2)",
+        "mov x11, x24",
+        "add x21, x20, #0x2 (2)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nZcv, ne",
+        "b.ne #-0x38",
+        "b #+0x40",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldrh w22, [x21]",
+        "ldrh w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "sub x24, x21, #0x2 (2)",
+        "mov x11, x24",
+        "sub x21, x20, #0x2 (2)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nZcv, ne",
+        "b.ne #-0x38",
+        "mov x20, x26",
+        "mov x21, x27",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
+        "cmp w0, w20, lsl #16",
+        "sub w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "repnz cmpsd": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 46,
       "Comment": "0xa7",
       "ExpectedArm64ASM": [
-        "cbz x5, #+0x68",
+        "mov x20, x5",
+        "cbz x20, #+0xb4",
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x24",
-        "ldr w26, [x11]",
-        "ldr w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "add x11, x11, #0x4 (4)",
-        "add x10, x10, #0x4 (4)",
-        "ccmp x27, x26, #nZcv, ne",
-        "b.ne #-0x18",
-        "b #+0x20",
-        "ldr w26, [x11]",
-        "ldr w27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x4 (4)",
-        "sub x10, x10, #0x4 (4)",
-        "ccmp x27, x26, #nZcv, ne",
-        "b.ne #-0x18",
-        "mov x20, x27",
-        "eor w27, w20, w26",
-        "subs w26, w20, w26",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x44",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr w22, [x21]",
+        "ldr w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "add x24, x21, #0x4 (4)",
+        "mov x11, x24",
+        "add x21, x20, #0x4 (4)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nZcv, ne",
+        "b.ne #-0x38",
+        "b #+0x40",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr w22, [x21]",
+        "ldr w23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "sub x24, x21, #0x4 (4)",
+        "mov x11, x24",
+        "sub x21, x20, #0x4 (4)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nZcv, ne",
+        "b.ne #-0x38",
+        "mov x20, x26",
+        "mov x21, x27",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "repnz cmpsq": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 46,
       "Comment": "0xa7",
       "ExpectedArm64ASM": [
-        "cbz x5, #+0x68",
+        "mov x20, x5",
+        "cbz x20, #+0xb4",
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x24",
-        "ldr x26, [x11]",
-        "ldr x27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "add x11, x11, #0x8 (8)",
-        "add x10, x10, #0x8 (8)",
-        "ccmp x27, x26, #nZcv, ne",
-        "b.ne #-0x18",
-        "b #+0x20",
-        "ldr x26, [x11]",
-        "ldr x27, [x10]",
-        "subs x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x8 (8)",
-        "sub x10, x10, #0x8 (8)",
-        "ccmp x27, x26, #nZcv, ne",
-        "b.ne #-0x18",
-        "mov x20, x27",
-        "eor w27, w20, w26",
-        "subs x26, x20, x26",
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x44",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr x22, [x21]",
+        "ldr x23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "add x24, x21, #0x8 (8)",
+        "mov x11, x24",
+        "add x21, x20, #0x8 (8)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nZcv, ne",
+        "b.ne #-0x38",
+        "b #+0x40",
+        "mov x20, x10",
+        "mov x21, x11",
+        "ldr x22, [x21]",
+        "ldr x23, [x20]",
+        "mov x26, x22",
+        "mov x27, x23",
+        "mov x24, x5",
+        "subs x25, x24, #0x1 (1)",
+        "mov x5, x25",
+        "sub x24, x21, #0x8 (8)",
+        "mov x11, x24",
+        "sub x21, x20, #0x8 (8)",
+        "mov x10, x21",
+        "ccmp x23, x22, #nZcv, ne",
+        "b.ne #-0x38",
+        "mov x20, x26",
+        "mov x21, x27",
+        "eor w22, w21, w20",
+        "mov x27, x22",
+        "subs x22, x21, x20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "test al, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xa8",
       "ExpectedArm64ASM": [
-        "and w26, w4, #0x1",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "and w21, w20, #0x1",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21"
       ]
     },
     "test ax, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xa9",
       "ExpectedArm64ASM": [
-        "and w26, w4, #0x1",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x4",
+        "and w21, w20, #0x1",
+        "cmn wzr, w21, lsl #16",
+        "mov x26, x21"
       ]
     },
     "test eax, 1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xa9",
       "ExpectedArm64ASM": [
-        "ands w26, w4, #0x1"
+        "mov x20, x4",
+        "ands w21, w20, #0x1",
+        "mov x26, x21"
       ]
     },
     "test rax, 1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xa9",
       "ExpectedArm64ASM": [
-        "ands x26, x4, #0x1"
+        "mov x20, x4",
+        "ands x21, x20, #0x1",
+        "mov x26, x21"
       ]
     },
     "test al, -1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xa8",
       "ExpectedArm64ASM": [
-        "mov x26, x4",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "mov x21, x20",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21"
       ]
     },
     "test ax, -1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xa9",
       "ExpectedArm64ASM": [
-        "mov x26, x4",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x4",
+        "mov x21, x20",
+        "cmn wzr, w21, lsl #16",
+        "mov x26, x21"
       ]
     },
     "test eax, -1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xa9",
       "ExpectedArm64ASM": [
-        "ands w26, w4, w4"
+        "mov x20, x4",
+        "ands w21, w20, w20",
+        "mov x26, x21"
       ]
     },
     "test rax, -1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xa9",
       "ExpectedArm64ASM": [
-        "ands x26, x4, x4"
+        "mov x20, x4",
+        "ands x21, x20, x20",
+        "mov x26, x21"
       ]
     },
     "stosb": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0xaa",
       "ExpectedArm64ASM": [
-        "strb w4, [x11]",
+        "mov x20, x4",
+        "mov x21, x11",
+        "strb w20, [x21]",
         "ldrsb x20, [x28, #714]",
-        "add x11, x11, x20"
+        "add x22, x21, x20",
+        "mov x11, x22"
       ]
     },
     "stosw": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0xab",
       "ExpectedArm64ASM": [
-        "strh w4, [x11]",
+        "mov x20, x4",
+        "mov x21, x11",
+        "strh w20, [x21]",
         "ldrsb x20, [x28, #714]",
-        "add x11, x11, x20, lsl #1"
+        "add x22, x21, x20, lsl #1",
+        "mov x11, x22"
       ]
     },
     "stosd": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0xab",
       "ExpectedArm64ASM": [
-        "str w4, [x11]",
+        "mov x20, x4",
+        "mov x21, x11",
+        "str w20, [x21]",
         "ldrsb x20, [x28, #714]",
-        "add x11, x11, x20, lsl #2"
+        "add x22, x21, x20, lsl #2",
+        "mov x11, x22"
       ]
     },
     "stosq": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0xab",
       "ExpectedArm64ASM": [
-        "str x4, [x11]",
+        "mov x20, x4",
+        "mov x21, x11",
+        "str x20, [x21]",
         "ldrsb x20, [x28, #714]",
-        "add x11, x11, x20, lsl #3"
+        "add x22, x21, x20, lsl #3",
+        "mov x11, x22"
       ]
     },
     "rep stosb": {
-      "ExpectedInstructionCount": 55,
+      "ExpectedInstructionCount": 60,
       "Comment": "0xaa",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "ldrsb x21, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
-        "tbnz w21, #1, #+0x64",
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "mov x20, x11",
+        "mov x22, x5",
+        "ldrsb x23, [x28, #714]",
+        "mov x0, x22",
+        "mov x1, x20",
+        "tbnz w23, #1, #+0x64",
         "cbz x0, #+0x58",
         "sub x0, x0, #0x20 (32)",
         "tbnz x0, #63, #+0x3c",
-        "dup v1.16b, w20",
+        "dup v1.16b, w21",
         "sub x0, x0, #0x20 (32)",
         "tbnz x0, #63, #+0x14",
         "stp q1, q1, [x1], #32",
@@ -3679,16 +4712,16 @@
         "tbz x0, #63, #-0x8",
         "add x0, x0, #0x20 (32)",
         "cbz x0, #+0x10",
-        "strb w20, [x1], #1",
+        "strb w21, [x1], #1",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0x8",
-        "add x11, x11, x5",
+        "add x24, x20, x22",
         "b #+0x68",
         "cbz x0, #+0x60",
         "sub x1, x1, #0x1f (31)",
         "sub x0, x0, #0x20 (32)",
         "tbnz x0, #63, #+0x3c",
-        "dup v1.16b, w20",
+        "dup v1.16b, w21",
         "sub x0, x0, #0x20 (32)",
         "tbnz x0, #63, #+0x14",
         "stp q1, q1, [x1], #-32",
@@ -3705,26 +4738,31 @@
         "add x0, x0, #0x20 (32)",
         "cbz x0, #+0x14",
         "add x1, x1, #0x1f (31)",
-        "strb w20, [x1], #-1",
+        "strb w21, [x1], #-1",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0x8",
-        "sub x11, x11, x5",
-        "mov w5, #0x0"
+        "sub x24, x20, x22",
+        "mov w20, #0x0",
+        "mov x5, x20",
+        "mov x11, x24"
       ]
     },
     "rep stosw": {
-      "ExpectedInstructionCount": 55,
+      "ExpectedInstructionCount": 60,
       "Comment": "0xab",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "ldrsb x21, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
-        "tbnz w21, #1, #+0x64",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "mov x20, x11",
+        "mov x22, x5",
+        "ldrsb x23, [x28, #714]",
+        "mov x0, x22",
+        "mov x1, x20",
+        "tbnz w23, #1, #+0x64",
         "cbz x0, #+0x58",
         "sub x0, x0, #0x10 (16)",
         "tbnz x0, #63, #+0x3c",
-        "dup v1.8h, w20",
+        "dup v1.8h, w21",
         "sub x0, x0, #0x10 (16)",
         "tbnz x0, #63, #+0x14",
         "stp q1, q1, [x1], #32",
@@ -3740,16 +4778,16 @@
         "tbz x0, #63, #-0x8",
         "add x0, x0, #0x10 (16)",
         "cbz x0, #+0x10",
-        "strh w20, [x1], #2",
+        "strh w21, [x1], #2",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0x8",
-        "add x11, x11, x5, lsl #1",
+        "add x24, x20, x22, lsl #1",
         "b #+0x68",
         "cbz x0, #+0x60",
         "sub x1, x1, #0x1e (30)",
         "sub x0, x0, #0x10 (16)",
         "tbnz x0, #63, #+0x3c",
-        "dup v1.8h, w20",
+        "dup v1.8h, w21",
         "sub x0, x0, #0x10 (16)",
         "tbnz x0, #63, #+0x14",
         "stp q1, q1, [x1], #-32",
@@ -3766,26 +4804,31 @@
         "add x0, x0, #0x10 (16)",
         "cbz x0, #+0x14",
         "add x1, x1, #0x1e (30)",
-        "strh w20, [x1], #-2",
+        "strh w21, [x1], #-2",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0x8",
-        "sub x11, x11, x5, lsl #1",
-        "mov w5, #0x0"
+        "sub x24, x20, x22, lsl #1",
+        "mov w20, #0x0",
+        "mov x5, x20",
+        "mov x11, x24"
       ]
     },
     "rep stosd": {
-      "ExpectedInstructionCount": 55,
+      "ExpectedInstructionCount": 60,
       "Comment": "0xab",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "ldrsb x21, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
-        "tbnz w21, #1, #+0x64",
+        "mov x20, x4",
+        "mov w21, w20",
+        "mov x20, x11",
+        "mov x22, x5",
+        "ldrsb x23, [x28, #714]",
+        "mov x0, x22",
+        "mov x1, x20",
+        "tbnz w23, #1, #+0x64",
         "cbz x0, #+0x58",
         "sub x0, x0, #0x8 (8)",
         "tbnz x0, #63, #+0x3c",
-        "dup v1.4s, w20",
+        "dup v1.4s, w21",
         "sub x0, x0, #0x8 (8)",
         "tbnz x0, #63, #+0x14",
         "stp q1, q1, [x1], #32",
@@ -3801,16 +4844,16 @@
         "tbz x0, #63, #-0x8",
         "add x0, x0, #0x8 (8)",
         "cbz x0, #+0x10",
-        "str w20, [x1], #4",
+        "str w21, [x1], #4",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0x8",
-        "add x11, x11, x5, lsl #2",
+        "add x24, x20, x22, lsl #2",
         "b #+0x68",
         "cbz x0, #+0x60",
         "sub x1, x1, #0x1c (28)",
         "sub x0, x0, #0x8 (8)",
         "tbnz x0, #63, #+0x3c",
-        "dup v1.4s, w20",
+        "dup v1.4s, w21",
         "sub x0, x0, #0x8 (8)",
         "tbnz x0, #63, #+0x14",
         "stp q1, q1, [x1], #-32",
@@ -3827,29 +4870,34 @@
         "add x0, x0, #0x8 (8)",
         "cbz x0, #+0x14",
         "add x1, x1, #0x1c (28)",
-        "str w20, [x1], #-4",
+        "str w21, [x1], #-4",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0x8",
-        "sub x11, x11, x5, lsl #2",
-        "mov w5, #0x0"
+        "sub x24, x20, x22, lsl #2",
+        "mov w20, #0x0",
+        "mov x5, x20",
+        "mov x11, x24"
       ]
     },
     "rep stosq": {
-      "ExpectedInstructionCount": 54,
+      "ExpectedInstructionCount": 59,
       "Comment": [
         "Unrolling the loop for faster memset can be done.",
         "Taking advantage of ARM MOPs instructions can be done",
         "0xab"
       ],
       "ExpectedArm64ASM": [
-        "ldrsb x20, [x28, #714]",
-        "mov x0, x5",
-        "mov x1, x11",
-        "tbnz w20, #1, #+0x64",
+        "mov x20, x4",
+        "mov x21, x11",
+        "mov x22, x5",
+        "ldrsb x23, [x28, #714]",
+        "mov x0, x22",
+        "mov x1, x21",
+        "tbnz w23, #1, #+0x64",
         "cbz x0, #+0x58",
         "sub x0, x0, #0x4 (4)",
         "tbnz x0, #63, #+0x3c",
-        "dup v1.2d, x4",
+        "dup v1.2d, x20",
         "sub x0, x0, #0x4 (4)",
         "tbnz x0, #63, #+0x14",
         "stp q1, q1, [x1], #32",
@@ -3865,16 +4913,16 @@
         "tbz x0, #63, #-0x8",
         "add x0, x0, #0x4 (4)",
         "cbz x0, #+0x10",
-        "str x4, [x1], #8",
+        "str x20, [x1], #8",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0x8",
-        "add x11, x11, x5, lsl #3",
+        "add x24, x21, x22, lsl #3",
         "b #+0x68",
         "cbz x0, #+0x60",
         "sub x1, x1, #0x18 (24)",
         "sub x0, x0, #0x4 (4)",
         "tbnz x0, #63, #+0x3c",
-        "dup v1.2d, x4",
+        "dup v1.2d, x20",
         "sub x0, x0, #0x4 (4)",
         "tbnz x0, #63, #+0x14",
         "stp q1, q1, [x1], #-32",
@@ -3891,535 +4939,775 @@
         "add x0, x0, #0x4 (4)",
         "cbz x0, #+0x14",
         "add x1, x1, #0x18 (24)",
-        "str x4, [x1], #-8",
+        "str x20, [x1], #-8",
         "sub x0, x0, #0x1 (1)",
         "cbnz x0, #-0x8",
-        "sub x11, x11, x5, lsl #3",
-        "mov w5, #0x0"
+        "sub x24, x21, x22, lsl #3",
+        "mov w20, #0x0",
+        "mov x5, x20",
+        "mov x11, x24"
       ]
     },
     "lodsb": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 9,
       "Comment": "0xac",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x10]",
-        "bfxil x4, x20, #0, #8",
-        "ldrsb x20, [x28, #714]",
-        "add x10, x10, x20"
+        "mov x20, x10",
+        "ldrb w21, [x20]",
+        "mov x22, x4",
+        "mov x23, x22",
+        "bfxil x23, x21, #0, #8",
+        "mov x4, x23",
+        "ldrsb x21, [x28, #714]",
+        "add x22, x20, x21",
+        "mov x10, x22"
       ]
     },
     "lodsw": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 9,
       "Comment": "0xad",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x10]",
-        "bfxil x4, x20, #0, #16",
-        "ldrsb x20, [x28, #714]",
-        "add x10, x10, x20, lsl #1"
+        "mov x20, x10",
+        "ldrh w21, [x20]",
+        "mov x22, x4",
+        "mov x23, x22",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "ldrsb x21, [x28, #714]",
+        "add x22, x20, x21, lsl #1",
+        "mov x10, x22"
       ]
     },
     "lodsd": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0xad",
       "ExpectedArm64ASM": [
-        "ldr w4, [x10]",
-        "ldrsb x20, [x28, #714]",
-        "add x10, x10, x20, lsl #2"
+        "mov x20, x10",
+        "ldr w21, [x20]",
+        "mov x4, x21",
+        "ldrsb x21, [x28, #714]",
+        "add x22, x20, x21, lsl #2",
+        "mov x10, x22"
       ]
     },
     "lodsq": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0xad",
       "ExpectedArm64ASM": [
-        "ldr x4, [x10]",
-        "ldrsb x20, [x28, #714]",
-        "add x10, x10, x20, lsl #3"
+        "mov x20, x10",
+        "ldr x21, [x20]",
+        "mov x4, x21",
+        "ldrsb x21, [x28, #714]",
+        "add x22, x20, x21, lsl #3",
+        "mov x10, x22"
       ]
     },
     "rep lodsb": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 33,
       "Comment": "0xac",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x20",
-        "cbz x5, #+0x18",
-        "ldrb w20, [x10]",
-        "bfxil x4, x20, #0, #8",
-        "sub x5, x5, #0x1 (1)",
-        "add x10, x10, #0x1 (1)",
-        "b #-0x14",
-        "b #+0x1c",
-        "cbz x5, #+0x18",
-        "ldrb w20, [x10]",
-        "bfxil x4, x20, #0, #8",
-        "sub x5, x5, #0x1 (1)",
-        "sub x10, x10, #0x1 (1)",
-        "b #-0x14"
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x40",
+        "mov x20, x5",
+        "cbz x20, #+0x34",
+        "mov x20, x10",
+        "ldrb w21, [x20]",
+        "mov x22, x4",
+        "mov x23, x22",
+        "bfxil x23, x21, #0, #8",
+        "mov x4, x23",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x1 (1)",
+        "mov x10, x21",
+        "b #-0x34",
+        "b #+0x3c",
+        "mov x20, x5",
+        "cbz x20, #+0x34",
+        "mov x20, x10",
+        "ldrb w21, [x20]",
+        "mov x22, x4",
+        "mov x23, x22",
+        "bfxil x23, x21, #0, #8",
+        "mov x4, x23",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x1 (1)",
+        "mov x10, x21",
+        "b #-0x34"
       ]
     },
     "rep lodsw": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 33,
       "Comment": "0xad",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x20",
-        "cbz x5, #+0x18",
-        "ldrh w20, [x10]",
-        "bfxil x4, x20, #0, #16",
-        "sub x5, x5, #0x1 (1)",
-        "add x10, x10, #0x2 (2)",
-        "b #-0x14",
-        "b #+0x1c",
-        "cbz x5, #+0x18",
-        "ldrh w20, [x10]",
-        "bfxil x4, x20, #0, #16",
-        "sub x5, x5, #0x1 (1)",
-        "sub x10, x10, #0x2 (2)",
-        "b #-0x14"
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x40",
+        "mov x20, x5",
+        "cbz x20, #+0x34",
+        "mov x20, x10",
+        "ldrh w21, [x20]",
+        "mov x22, x4",
+        "mov x23, x22",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x2 (2)",
+        "mov x10, x21",
+        "b #-0x34",
+        "b #+0x3c",
+        "mov x20, x5",
+        "cbz x20, #+0x34",
+        "mov x20, x10",
+        "ldrh w21, [x20]",
+        "mov x22, x4",
+        "mov x23, x22",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x2 (2)",
+        "mov x10, x21",
+        "b #-0x34"
       ]
     },
     "rep lodsd": {
-      "ExpectedInstructionCount": 15,
+      "ExpectedInstructionCount": 27,
       "Comment": "0xad",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x1c",
-        "cbz x5, #+0x14",
-        "ldr w4, [x10]",
-        "sub x5, x5, #0x1 (1)",
-        "add x10, x10, #0x4 (4)",
-        "b #-0x10",
-        "b #+0x18",
-        "cbz x5, #+0x14",
-        "ldr w4, [x10]",
-        "sub x5, x5, #0x1 (1)",
-        "sub x10, x10, #0x4 (4)",
-        "b #-0x10"
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x34",
+        "mov x20, x5",
+        "cbz x20, #+0x28",
+        "mov x20, x10",
+        "ldr w21, [x20]",
+        "mov x4, x21",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x4 (4)",
+        "mov x10, x21",
+        "b #-0x28",
+        "b #+0x30",
+        "mov x20, x5",
+        "cbz x20, #+0x28",
+        "mov x20, x10",
+        "ldr w21, [x20]",
+        "mov x4, x21",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x4 (4)",
+        "mov x10, x21",
+        "b #-0x28"
       ]
     },
     "rep lodsq": {
-      "ExpectedInstructionCount": 15,
+      "ExpectedInstructionCount": 27,
       "Comment": "0xad",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x1c",
-        "cbz x5, #+0x14",
-        "ldr x4, [x10]",
-        "sub x5, x5, #0x1 (1)",
-        "add x10, x10, #0x8 (8)",
-        "b #-0x10",
-        "b #+0x18",
-        "cbz x5, #+0x14",
-        "ldr x4, [x10]",
-        "sub x5, x5, #0x1 (1)",
-        "sub x10, x10, #0x8 (8)",
-        "b #-0x10"
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x34",
+        "mov x20, x5",
+        "cbz x20, #+0x28",
+        "mov x20, x10",
+        "ldr x21, [x20]",
+        "mov x4, x21",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x8 (8)",
+        "mov x10, x21",
+        "b #-0x28",
+        "b #+0x30",
+        "mov x20, x5",
+        "cbz x20, #+0x28",
+        "mov x20, x10",
+        "ldr x21, [x20]",
+        "mov x4, x21",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x8 (8)",
+        "mov x10, x21",
+        "b #-0x28"
       ]
     },
     "scasb": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 15,
       "Comment": "0xae",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x11]",
-        "ldrsb x21, [x28, #714]",
-        "add x11, x11, x21",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #24",
-        "cmp w0, w20, lsl #24",
-        "sub w26, w4, w20",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrb w22, [x20]",
+        "ldrsb x23, [x28, #714]",
+        "add x24, x20, x23",
+        "mov x11, x24",
+        "eor w20, w21, w22",
+        "mov x27, x20",
+        "lsl w0, w21, #24",
+        "cmp w0, w22, lsl #24",
+        "sub w20, w21, w22",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "scasw": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 15,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x11]",
-        "ldrsb x21, [x28, #714]",
-        "add x11, x11, x21, lsl #1",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #16",
-        "cmp w0, w20, lsl #16",
-        "sub w26, w4, w20",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrh w22, [x20]",
+        "ldrsb x23, [x28, #714]",
+        "add x24, x20, x23, lsl #1",
+        "mov x11, x24",
+        "eor w20, w21, w22",
+        "mov x27, x20",
+        "lsl w0, w21, #16",
+        "cmp w0, w22, lsl #16",
+        "sub w20, w21, w22",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "scasd": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 13,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
-        "ldr w20, [x11]",
-        "ldrsb x21, [x28, #714]",
-        "add x11, x11, x21, lsl #2",
-        "eor w27, w4, w20",
-        "subs w26, w4, w20",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr w22, [x20]",
+        "ldrsb x23, [x28, #714]",
+        "add x24, x20, x23, lsl #2",
+        "mov x11, x24",
+        "eor w20, w21, w22",
+        "mov x27, x20",
+        "subs w20, w21, w22",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "scasq": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 13,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
-        "ldr x20, [x11]",
-        "ldrsb x21, [x28, #714]",
-        "add x11, x11, x21, lsl #3",
-        "eor w27, w4, w20",
-        "subs x26, x4, x20",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr x22, [x20]",
+        "ldrsb x23, [x28, #714]",
+        "add x24, x20, x23, lsl #3",
+        "mov x11, x24",
+        "eor w20, w21, w22",
+        "mov x27, x20",
+        "subs x20, x21, x22",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "repz scasb": {
-      "ExpectedInstructionCount": 29,
+      "ExpectedInstructionCount": 45,
       "Comment": "0xae",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x38",
-        "cbz x5, #+0x30",
-        "ldrb w20, [x11]",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #24",
-        "cmp w0, w20, lsl #24",
-        "sub w26, w4, w20",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sub x5, x5, #0x1 (1)",
-        "add x11, x11, #0x1 (1)",
-        "b.eq #-0x2c",
-        "b #+0x34",
-        "cbz x5, #+0x30",
-        "ldrb w20, [x11]",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #24",
-        "cmp w0, w20, lsl #24",
-        "sub w26, w4, w20",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sub x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x1 (1)",
-        "b.eq #-0x2c"
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x58",
+        "mov x20, x5",
+        "cbz x20, #+0x4c",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrb w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "lsl w0, w21, #24",
+        "cmp w0, w22, lsl #24",
+        "sub w23, w21, w22",
+        "mov x26, x23",
+        "mrs x21, nzcv",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x1 (1)",
+        "mov x11, x21",
+        "b.eq #-0x4c",
+        "b #+0x54",
+        "mov x20, x5",
+        "cbz x20, #+0x4c",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrb w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "lsl w0, w21, #24",
+        "cmp w0, w22, lsl #24",
+        "sub w23, w21, w22",
+        "mov x26, x23",
+        "mrs x21, nzcv",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x1 (1)",
+        "mov x11, x21",
+        "b.eq #-0x4c"
       ]
     },
     "repz scasw": {
-      "ExpectedInstructionCount": 29,
+      "ExpectedInstructionCount": 45,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x38",
-        "cbz x5, #+0x30",
-        "ldrh w20, [x11]",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #16",
-        "cmp w0, w20, lsl #16",
-        "sub w26, w4, w20",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sub x5, x5, #0x1 (1)",
-        "add x11, x11, #0x2 (2)",
-        "b.eq #-0x2c",
-        "b #+0x34",
-        "cbz x5, #+0x30",
-        "ldrh w20, [x11]",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #16",
-        "cmp w0, w20, lsl #16",
-        "sub w26, w4, w20",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sub x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x2 (2)",
-        "b.eq #-0x2c"
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x58",
+        "mov x20, x5",
+        "cbz x20, #+0x4c",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrh w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "lsl w0, w21, #16",
+        "cmp w0, w22, lsl #16",
+        "sub w23, w21, w22",
+        "mov x26, x23",
+        "mrs x21, nzcv",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x2 (2)",
+        "mov x11, x21",
+        "b.eq #-0x4c",
+        "b #+0x54",
+        "mov x20, x5",
+        "cbz x20, #+0x4c",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrh w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "lsl w0, w21, #16",
+        "cmp w0, w22, lsl #16",
+        "sub w23, w21, w22",
+        "mov x26, x23",
+        "mrs x21, nzcv",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x2 (2)",
+        "mov x11, x21",
+        "b.eq #-0x4c"
       ]
     },
     "repz scasd": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 41,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x30",
-        "cbz x5, #+0x28",
-        "ldr w20, [x11]",
-        "eor w27, w4, w20",
-        "subs w26, w4, w20",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sub x5, x5, #0x1 (1)",
-        "add x11, x11, #0x4 (4)",
-        "b.eq #-0x24",
-        "b #+0x2c",
-        "cbz x5, #+0x28",
-        "ldr w20, [x11]",
-        "eor w27, w4, w20",
-        "subs w26, w4, w20",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sub x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x4 (4)",
-        "b.eq #-0x24"
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x50",
+        "mov x20, x5",
+        "cbz x20, #+0x44",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "subs w23, w21, w22",
+        "mov x26, x23",
+        "mrs x21, nzcv",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x4 (4)",
+        "mov x11, x21",
+        "b.eq #-0x44",
+        "b #+0x4c",
+        "mov x20, x5",
+        "cbz x20, #+0x44",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "subs w23, w21, w22",
+        "mov x26, x23",
+        "mrs x21, nzcv",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x4 (4)",
+        "mov x11, x21",
+        "b.eq #-0x44"
       ]
     },
     "repz scasq": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 41,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x30",
-        "cbz x5, #+0x28",
-        "ldr x20, [x11]",
-        "eor w27, w4, w20",
-        "subs x26, x4, x20",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sub x5, x5, #0x1 (1)",
-        "add x11, x11, #0x8 (8)",
-        "b.eq #-0x24",
-        "b #+0x2c",
-        "cbz x5, #+0x28",
-        "ldr x20, [x11]",
-        "eor w27, w4, w20",
-        "subs x26, x4, x20",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sub x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x8 (8)",
-        "b.eq #-0x24"
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x50",
+        "mov x20, x5",
+        "cbz x20, #+0x44",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr x22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "subs x23, x21, x22",
+        "mov x26, x23",
+        "mrs x21, nzcv",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x8 (8)",
+        "mov x11, x21",
+        "b.eq #-0x44",
+        "b #+0x4c",
+        "mov x20, x5",
+        "cbz x20, #+0x44",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr x22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "subs x23, x21, x22",
+        "mov x26, x23",
+        "mrs x21, nzcv",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x8 (8)",
+        "mov x11, x21",
+        "b.eq #-0x44"
       ]
     },
     "repnz scasb": {
-      "ExpectedInstructionCount": 29,
+      "ExpectedInstructionCount": 45,
       "Comment": "0xae",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x38",
-        "cbz x5, #+0x30",
-        "ldrb w20, [x11]",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #24",
-        "cmp w0, w20, lsl #24",
-        "sub w26, w4, w20",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sub x5, x5, #0x1 (1)",
-        "add x11, x11, #0x1 (1)",
-        "b.ne #-0x2c",
-        "b #+0x34",
-        "cbz x5, #+0x30",
-        "ldrb w20, [x11]",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #24",
-        "cmp w0, w20, lsl #24",
-        "sub w26, w4, w20",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sub x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x1 (1)",
-        "b.ne #-0x2c"
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x58",
+        "mov x20, x5",
+        "cbz x20, #+0x4c",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrb w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "lsl w0, w21, #24",
+        "cmp w0, w22, lsl #24",
+        "sub w23, w21, w22",
+        "mov x26, x23",
+        "mrs x21, nzcv",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x1 (1)",
+        "mov x11, x21",
+        "b.ne #-0x4c",
+        "b #+0x54",
+        "mov x20, x5",
+        "cbz x20, #+0x4c",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrb w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "lsl w0, w21, #24",
+        "cmp w0, w22, lsl #24",
+        "sub w23, w21, w22",
+        "mov x26, x23",
+        "mrs x21, nzcv",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x1 (1)",
+        "mov x11, x21",
+        "b.ne #-0x4c"
       ]
     },
     "repnz scasw": {
-      "ExpectedInstructionCount": 29,
+      "ExpectedInstructionCount": 45,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x38",
-        "cbz x5, #+0x30",
-        "ldrh w20, [x11]",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #16",
-        "cmp w0, w20, lsl #16",
-        "sub w26, w4, w20",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sub x5, x5, #0x1 (1)",
-        "add x11, x11, #0x2 (2)",
-        "b.ne #-0x2c",
-        "b #+0x34",
-        "cbz x5, #+0x30",
-        "ldrh w20, [x11]",
-        "eor w27, w4, w20",
-        "lsl w0, w4, #16",
-        "cmp w0, w20, lsl #16",
-        "sub w26, w4, w20",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sub x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x2 (2)",
-        "b.ne #-0x2c"
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x58",
+        "mov x20, x5",
+        "cbz x20, #+0x4c",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrh w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "lsl w0, w21, #16",
+        "cmp w0, w22, lsl #16",
+        "sub w23, w21, w22",
+        "mov x26, x23",
+        "mrs x21, nzcv",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x2 (2)",
+        "mov x11, x21",
+        "b.ne #-0x4c",
+        "b #+0x54",
+        "mov x20, x5",
+        "cbz x20, #+0x4c",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldrh w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "lsl w0, w21, #16",
+        "cmp w0, w22, lsl #16",
+        "sub w23, w21, w22",
+        "mov x26, x23",
+        "mrs x21, nzcv",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x2 (2)",
+        "mov x11, x21",
+        "b.ne #-0x4c"
       ]
     },
     "repnz scasd": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 41,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x30",
-        "cbz x5, #+0x28",
-        "ldr w20, [x11]",
-        "eor w27, w4, w20",
-        "subs w26, w4, w20",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sub x5, x5, #0x1 (1)",
-        "add x11, x11, #0x4 (4)",
-        "b.ne #-0x24",
-        "b #+0x2c",
-        "cbz x5, #+0x28",
-        "ldr w20, [x11]",
-        "eor w27, w4, w20",
-        "subs w26, w4, w20",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sub x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x4 (4)",
-        "b.ne #-0x24"
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x50",
+        "mov x20, x5",
+        "cbz x20, #+0x44",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "subs w23, w21, w22",
+        "mov x26, x23",
+        "mrs x21, nzcv",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x4 (4)",
+        "mov x11, x21",
+        "b.ne #-0x44",
+        "b #+0x4c",
+        "mov x20, x5",
+        "cbz x20, #+0x44",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr w22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "subs w23, w21, w22",
+        "mov x26, x23",
+        "mrs x21, nzcv",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x4 (4)",
+        "mov x11, x21",
+        "b.ne #-0x44"
       ]
     },
     "repnz scasq": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 41,
       "Comment": "0xaf",
       "ExpectedArm64ASM": [
         "ldrsb x20, [x28, #714]",
-        "lsr x20, x20, #63",
-        "cbz x20, #+0x8",
-        "b #+0x30",
-        "cbz x5, #+0x28",
-        "ldr x20, [x11]",
-        "eor w27, w4, w20",
-        "subs x26, x4, x20",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sub x5, x5, #0x1 (1)",
-        "add x11, x11, #0x8 (8)",
-        "b.ne #-0x24",
-        "b #+0x2c",
-        "cbz x5, #+0x28",
-        "ldr x20, [x11]",
-        "eor w27, w4, w20",
-        "subs x26, x4, x20",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20",
-        "sub x5, x5, #0x1 (1)",
-        "sub x11, x11, #0x8 (8)",
-        "b.ne #-0x24"
+        "lsr x21, x20, #63",
+        "cbz x21, #+0x8",
+        "b #+0x50",
+        "mov x20, x5",
+        "cbz x20, #+0x44",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr x22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "subs x23, x21, x22",
+        "mov x26, x23",
+        "mrs x21, nzcv",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "add x21, x20, #0x8 (8)",
+        "mov x11, x21",
+        "b.ne #-0x44",
+        "b #+0x4c",
+        "mov x20, x5",
+        "cbz x20, #+0x44",
+        "mov x20, x11",
+        "mov x21, x4",
+        "ldr x22, [x20]",
+        "eor w23, w21, w22",
+        "mov x27, x23",
+        "subs x23, x21, x22",
+        "mov x26, x23",
+        "mrs x21, nzcv",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
+        "mov x21, x5",
+        "sub x22, x21, #0x1 (1)",
+        "mov x5, x22",
+        "sub x21, x20, #0x8 (8)",
+        "mov x11, x21",
+        "b.ne #-0x44"
       ]
     },
     "mov al, 0xff": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xb0",
       "ExpectedArm64ASM": [
-        "orr x4, x4, #0xff"
+        "mov x20, x4",
+        "orr x21, x20, #0xff",
+        "mov x4, x21"
       ]
     },
     "mov al, 0x82": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xb0",
       "ExpectedArm64ASM": [
         "mov w20, #0x82",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "mov ax, 0xffff": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xb8",
       "ExpectedArm64ASM": [
-        "orr x4, x4, #0xffff"
+        "mov x20, x4",
+        "orr x21, x20, #0xffff",
+        "mov x4, x21"
       ]
     },
     "mov ax, 0x4243": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xb8",
       "ExpectedArm64ASM": [
         "mov w20, #0x4243",
-        "bfxil x4, x20, #0, #16"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "mov eax, 0xffffffff": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "movz+movk doesn't turn in to bitfield move",
         "0xb8"
       ],
       "ExpectedArm64ASM": [
-        "mov w4, #0xffffffff"
+        "mov w20, #0xffffffff",
+        "mov x4, x20"
       ]
     },
     "mov eax, 0x44454647": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xb8",
       "ExpectedArm64ASM": [
-        "mov w4, #0x4647",
-        "movk w4, #0x4445, lsl #16"
+        "mov w20, #0x4647",
+        "movk w20, #0x4445, lsl #16",
+        "mov x4, x20"
       ]
     },
     "mov rax, 0xffffffffffffffff": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0xb8",
       "ExpectedArm64ASM": [
-        "mov x4, #0xffffffffffffffff"
+        "mov x20, #0xffffffffffffffff",
+        "mov x4, x20"
       ]
     },
     "mov rax, 0x5152535455565758": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xb8",
       "ExpectedArm64ASM": [
-        "mov x4, #0x5758",
-        "movk x4, #0x5556, lsl #16",
-        "movk x4, #0x5354, lsl #32",
-        "movk x4, #0x5152, lsl #48"
+        "mov x20, #0x5758",
+        "movk x20, #0x5556, lsl #16",
+        "movk x20, #0x5354, lsl #32",
+        "movk x20, #0x5152, lsl #48",
+        "mov x4, x20"
       ]
     },
     "xlat": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0xd7",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "ldrb w20, [x7, x20, sxtx]",
-        "bfxil x4, x20, #0, #8"
+        "mov x20, x7",
+        "mov x21, x4",
+        "uxtb w22, w21",
+        "ldrb w23, [x20, x22, sxtx]",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #8",
+        "mov x4, x20"
       ]
     },
     "cmc": {
@@ -4427,8 +5715,8 @@
       "Comment": "0xf5",
       "ExpectedArm64ASM": [
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "clc": {
@@ -4436,8 +5724,8 @@
       "Comment": "0xf8",
       "ExpectedArm64ASM": [
         "mrs x20, nzcv",
-        "and w20, w20, #0xdfffffff",
-        "msr nzcv, x20"
+        "and w21, w20, #0xdfffffff",
+        "msr nzcv, x21"
       ]
     },
     "stc": {
@@ -4445,8 +5733,8 @@
       "Comment": "0xf9",
       "ExpectedArm64ASM": [
         "mrs x20, nzcv",
-        "orr w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "orr w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cli": {
diff --git a/unittests/InstructionCountCI/PrimaryGroup.json b/unittests/InstructionCountCI/PrimaryGroup.json
index 750f446f03..7424186cd3 100644
--- a/unittests/InstructionCountCI/PrimaryGroup.json
+++ b/unittests/InstructionCountCI/PrimaryGroup.json
@@ -15,1656 +15,1993 @@
   ],
   "Instructions": {
     "add al, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x80 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #24",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #24",
         "cmn w0, w20, lsl #24",
-        "add w26, w27, #0x1 (1)",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #8"
+        "add w20, w21, #0x1 (1)",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "or al, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x80 /1",
       "ExpectedArm64ASM": [
-        "orr w26, w4, #0x1",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "orr w21, w20, #0x1",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "adc al, 1": {
-      "ExpectedInstructionCount": 20,
+      "ExpectedInstructionCount": 23,
       "Comment": "GROUP1 0x80 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "cset w21, hs",
-        "adc w20, w27, w20",
-        "uxtb w26, w20",
-        "cmp w26, #0x1 (1)",
-        "cset x20, lo",
-        "cmp w26, #0x1 (1)",
-        "cset x22, ls",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w26, w27",
-        "ubfx x21, x21, #7, #1",
-        "orr w20, w20, w21, lsl #28",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #8",
-        "msr nzcv, x20"
+        "mov x21, x4",
+        "mov x27, x21",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxtb w20, w23",
+        "cmp w20, #0x1 (1)",
+        "cset x23, lo",
+        "cmp w20, #0x1 (1)",
+        "cset x24, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x23, eq",
+        "cmn wzr, w20, lsl #24",
+        "mrs x22, nzcv",
+        "orr w23, w22, w25, lsl #29",
+        "bic w22, w20, w21",
+        "ubfx x24, x22, #7, #1",
+        "orr w22, w23, w24, lsl #28",
+        "mov x26, x20",
+        "mov x23, x21",
+        "bfxil x23, x20, #0, #8",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "sbb al, 1": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 24,
       "Comment": "GROUP1 0x80 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w27, w20",
-        "uxtb w26, w20",
-        "cmp x26, x27",
+        "mov x21, x4",
+        "mov x27, x21",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w20, w21, w23",
+        "uxtb w23, w20",
+        "cmp x23, x21",
         "cset x20, hi",
-        "cmp x26, x27",
-        "cset x22, hs",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w27, w26",
-        "ubfx x21, x21, #7, #1",
-        "orr w20, w20, w21, lsl #28",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #8",
+        "cmp x23, x21",
+        "cset x24, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w23, lsl #24",
+        "mrs x20, nzcv",
+        "orr w22, w20, w25, lsl #29",
+        "bic w20, w21, w23",
+        "ubfx x24, x20, #7, #1",
+        "orr w20, w22, w24, lsl #28",
+        "mov x26, x23",
+        "mov x22, x21",
+        "bfxil x22, x23, #0, #8",
+        "mov x4, x22",
         "msr nzcv, x20"
       ]
     },
     "and al, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x80 /4",
       "ExpectedArm64ASM": [
-        "and w26, w4, #0x1",
-        "cmn wzr, w26, lsl #24",
-        "bfxil x4, x26, #0, #8"
+        "mov x20, x4",
+        "and w21, w20, #0x1",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22"
       ]
     },
     "sub al, 1": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 13,
       "Comment": "GROUP1 0x80 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #24",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w27, #0x1 (1)",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #8",
-        "msr nzcv, x20"
+        "sub w20, w21, #0x1 (1)",
+        "mov x26, x20",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22",
+        "msr nzcv, x23"
       ]
     },
     "xor al, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x80 /6",
       "ExpectedArm64ASM": [
-        "eor w26, w4, #0x1",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "eor w21, w20, #0x1",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "cmp al, 1": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x80 /7",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #24",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w27, #0x1 (1)",
+        "sub w20, w21, #0x1 (1)",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "add al, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP1 0x80 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "lsl w0, w4, #24",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
         "cmn w0, w20, lsl #24",
-        "add w26, w4, #0xff (255)",
-        "bfxil x4, x26, #0, #8"
+        "add w20, w21, #0xff (255)",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "or al, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x80 /1",
       "ExpectedArm64ASM": [
-        "orr w26, w4, #0xff",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "orr w21, w20, #0xff",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "adc al, -1": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 24,
       "Comment": "GROUP1 0x80 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "cset w21, hs",
-        "adc w20, w4, w20",
-        "uxtb w26, w20",
-        "cmp w26, #0xff (255)",
-        "cset x20, lo",
-        "cmp w26, #0xff (255)",
-        "cset x22, ls",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w4, w26",
-        "ubfx x21, x21, #7, #1",
-        "orr w20, w20, w21, lsl #28",
-        "bfxil x4, x26, #0, #8",
-        "msr nzcv, x20"
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "cset w22, hs",
+        "adc w23, w21, w20",
+        "uxtb w20, w23",
+        "cmp w20, #0xff (255)",
+        "cset x23, lo",
+        "cmp w20, #0xff (255)",
+        "cset x24, ls",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x23, eq",
+        "cmn wzr, w20, lsl #24",
+        "mrs x22, nzcv",
+        "orr w23, w22, w25, lsl #29",
+        "bic w22, w21, w20",
+        "ubfx x24, x22, #7, #1",
+        "orr w22, w23, w24, lsl #28",
+        "mov x26, x20",
+        "mov x23, x21",
+        "bfxil x23, x20, #0, #8",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "sbb al, -1": {
-      "ExpectedInstructionCount": 20,
+      "ExpectedInstructionCount": 25,
       "Comment": "GROUP1 0x80 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "cset w21, hs",
-        "add w20, w20, w21",
-        "sub w20, w4, w20",
-        "uxtb w26, w20",
-        "cmp x26, x4",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "cset w22, hs",
+        "add w23, w20, w22",
+        "sub w20, w21, w23",
+        "uxtb w23, w20",
+        "cmp x23, x21",
         "cset x20, hi",
-        "cmp x26, x4",
-        "cset x22, hs",
-        "cmp x21, #0x1 (1)",
-        "csel x20, x22, x20, eq",
-        "cmn wzr, w26, lsl #24",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "bic w21, w26, w4",
-        "ubfx x21, x21, #7, #1",
-        "orr w20, w20, w21, lsl #28",
-        "bfxil x4, x26, #0, #8",
+        "cmp x23, x21",
+        "cset x24, hs",
+        "cmp x22, #0x1 (1)",
+        "csel x25, x24, x20, eq",
+        "cmn wzr, w23, lsl #24",
+        "mrs x20, nzcv",
+        "orr w22, w20, w25, lsl #29",
+        "bic w20, w23, w21",
+        "ubfx x24, x20, #7, #1",
+        "orr w20, w22, w24, lsl #28",
+        "mov x26, x23",
+        "mov x22, x21",
+        "bfxil x22, x23, #0, #8",
+        "mov x4, x22",
         "msr nzcv, x20"
       ]
     },
     "and al, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x80 /4",
       "ExpectedArm64ASM": [
-        "and w26, w4, #0xff",
-        "cmn wzr, w26, lsl #24",
-        "bfxil x4, x26, #0, #8"
+        "mov x20, x4",
+        "and w21, w20, #0xff",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22"
       ]
     },
     "sub al, -1": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP1 0x80 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "lsl w0, w4, #24",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w4, #0xff (255)",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "bfxil x4, x26, #0, #8",
-        "msr nzcv, x20"
+        "sub w20, w21, #0xff (255)",
+        "mov x26, x20",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22",
+        "msr nzcv, x23"
       ]
     },
     "xor al, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x80 /6",
       "ExpectedArm64ASM": [
-        "eor w26, w4, #0xff",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x4",
+        "eor w21, w20, #0xff",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mov x26, x21",
+        "cmn wzr, w21, lsl #24"
       ]
     },
     "cmp al, -1": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP1 0x80 /7",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "mvn w27, w4",
-        "lsl w0, w4, #24",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w4, #0xff (255)",
+        "sub w20, w21, #0xff (255)",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "add ax, 256": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "mov x27, x4",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w27, #0x100 (256)",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #16"
+        "add w20, w21, #0x100 (256)",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "add eax, 256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "adds w26, w27, #0x100 (256)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mov x27, x20",
+        "adds w21, w20, #0x100 (256)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "add rax, 256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "adds x26, x27, #0x100 (256)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mov x27, x20",
+        "adds x21, x20, #0x100 (256)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "or eax, 256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
-        "orr w4, w4, #0x100",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x4",
+        "orr w21, w20, #0x100",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "or rax, 256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
-        "orr x4, x4, #0x100",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x4",
+        "orr x21, x20, #0x100",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst x21, x21"
       ]
     },
     "adc eax, 256": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "mov x27, x4",
-        "adcs w26, w27, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adcs w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "adc rax, 256": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "mov x27, x4",
-        "adcs x26, x27, x20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adcs x22, x21, x20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sbb eax, 256": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "mov x27, x4",
-        "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs w26, w27, w20",
+        "mov x21, x4",
+        "mov x27, x21",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "sbcs w22, w21, w20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x26, x22",
+        "mov x4, x22",
+        "msr nzcv, x21"
       ]
     },
     "sbb rax, 256": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x100",
-        "mov x27, x4",
-        "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs x26, x27, x20",
+        "mov x21, x4",
+        "mov x27, x21",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "sbcs x22, x21, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x26, x22",
+        "mov x4, x22",
+        "msr nzcv, x21"
       ]
     },
     "and eax, 256": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
-        "ands w26, w4, #0x100",
-        "mov x4, x26"
+        "mov x20, x4",
+        "ands w21, w20, #0x100",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "and rax, 256": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
-        "ands x26, x4, #0x100",
-        "mov x4, x26"
+        "mov x20, x4",
+        "ands x21, x20, #0x100",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "sub eax, 256": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs w26, w27, #0x100 (256)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs w21, w20, #0x100 (256)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "sub rax, 256": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs x26, x27, #0x100 (256)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs x21, x20, #0x100 (256)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "xor eax, 256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
-        "eor w4, w4, #0x100",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x4",
+        "eor w21, w20, #0x100",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "xor rax, 256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
-        "eor x4, x4, #0x100",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x4",
+        "eor x21, x20, #0x100",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst x21, x21"
       ]
     },
     "cmp eax, 256": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x81 /7",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs w26, w27, #0x100 (256)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs w21, w20, #0x100 (256)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmp rax, 256": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x81 /7",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs x26, x27, #0x100 (256)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs x21, x20, #0x100 (256)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "add ax, -256": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xff00",
-        "mov x27, x4",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w27, w20",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #16"
+        "add w22, w21, w20",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x4, x20"
       ]
     },
     "add eax, -256": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffff00",
-        "mov x27, x4",
-        "adds w26, w27, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adds w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "add rax, -256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /0",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs x26, x27, #0x100 (256)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs x21, x20, #0x100 (256)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "or eax, -256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
-        "orr w4, w4, #0xffffff00",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x4",
+        "orr w21, w20, #0xffffff00",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "or rax, -256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /1",
       "ExpectedArm64ASM": [
-        "orr x4, x4, #0xffffffffffffff00",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x4",
+        "orr x21, x20, #0xffffffffffffff00",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst x21, x21"
       ]
     },
     "adc eax, -256": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffff00",
-        "mov x27, x4",
-        "adcs w26, w27, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adcs w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "adc rax, -256": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x81 /2",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffff00",
-        "mov x27, x4",
-        "adcs x26, x27, x20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adcs x22, x21, x20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sbb eax, -256": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffff00",
-        "mov x27, x4",
-        "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs w26, w27, w20",
+        "mov x21, x4",
+        "mov x27, x21",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "sbcs w22, w21, w20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x26, x22",
+        "mov x4, x22",
+        "msr nzcv, x21"
       ]
     },
     "sbb rax, -256": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP1 0x81 /3",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffff00",
-        "mov x27, x4",
-        "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs x26, x27, x20",
+        "mov x21, x4",
+        "mov x27, x21",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "sbcs x22, x21, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x26, x22",
+        "mov x4, x22",
+        "msr nzcv, x21"
       ]
     },
     "and eax, -256": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
-        "ands w26, w4, #0xffffff00",
-        "mov x4, x26"
+        "mov x20, x4",
+        "ands w21, w20, #0xffffff00",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "and rax, -256": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP1 0x81 /4",
       "ExpectedArm64ASM": [
-        "ands x26, x4, #0xffffffffffffff00",
-        "mov x4, x26"
+        "mov x20, x4",
+        "ands x21, x20, #0xffffffffffffff00",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "sub eax, -256": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffff00",
-        "mov x27, x4",
-        "subs w26, w27, w20",
+        "mov x21, x4",
+        "mov x27, x21",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x4, x22",
+        "msr nzcv, x21"
       ]
     },
     "sub rax, -256": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x81 /5",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "adds x26, x27, #0x100 (256)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "adds x21, x20, #0x100 (256)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "xor eax, -256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
-        "eor w4, w4, #0xffffff00",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x4",
+        "eor w21, w20, #0xffffff00",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "xor rax, -256": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x81 /6",
       "ExpectedArm64ASM": [
-        "eor x4, x4, #0xffffffffffffff00",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x4",
+        "eor x21, x20, #0xffffffffffffff00",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst x21, x21"
       ]
     },
     "cmp eax, -256": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x81 /7",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffff00",
-        "mov x27, x4",
-        "subs w26, w27, w20",
+        "mov x21, x4",
+        "mov x27, x21",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmp rax, -256": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x81 /7",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "adds x26, x27, #0x100 (256)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "adds x21, x20, #0x100 (256)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "add ax, 1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "mov x27, x21",
+        "lsl w0, w21, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w27, #0x1 (1)",
-        "mov x4, x27",
-        "bfxil x4, x26, #0, #16"
+        "add w20, w21, #0x1 (1)",
+        "mov x26, x20",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "add eax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "adds w26, w27, #0x1 (1)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mov x27, x20",
+        "adds w21, w20, #0x1 (1)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "add rax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "adds x26, x27, #0x1 (1)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mov x27, x20",
+        "adds x21, x20, #0x1 (1)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "or eax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /1",
       "ExpectedArm64ASM": [
-        "orr w4, w4, #0x1",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x4",
+        "orr w21, w20, #0x1",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "or rax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /1",
       "ExpectedArm64ASM": [
-        "orr x4, x4, #0x1",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x4",
+        "orr x21, x20, #0x1",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst x21, x21"
       ]
     },
     "adc eax, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "adcs w26, w27, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adcs w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "adc rax, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "adcs x26, x27, x20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mov x27, x21",
+        "adcs x22, x21, x20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sbb eax, 1": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP1 0x83 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs w26, w27, w20",
+        "mov x21, x4",
+        "mov x27, x21",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "sbcs w22, w21, w20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x26, x22",
+        "mov x4, x22",
+        "msr nzcv, x21"
       ]
     },
     "sbb rax, 1": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP1 0x83 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "mov x27, x4",
-        "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs x26, x27, x20",
+        "mov x21, x4",
+        "mov x27, x21",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "sbcs x22, x21, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x26, x22",
+        "mov x4, x22",
+        "msr nzcv, x21"
       ]
     },
     "and eax, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP1 0x83 /4",
       "ExpectedArm64ASM": [
-        "ands w26, w4, #0x1",
-        "mov x4, x26"
+        "mov x20, x4",
+        "ands w21, w20, #0x1",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "and rax, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP1 0x83 /4",
       "ExpectedArm64ASM": [
-        "ands x26, x4, #0x1",
-        "mov x4, x26"
+        "mov x20, x4",
+        "ands x21, x20, #0x1",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "sub eax, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x83 /5",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs w26, w27, #0x1 (1)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs w21, w20, #0x1 (1)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "sub rax, 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x83 /5",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs x26, x27, #0x1 (1)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs x21, x20, #0x1 (1)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "xor eax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /6",
       "ExpectedArm64ASM": [
-        "eor w4, w4, #0x1",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x4",
+        "eor w21, w20, #0x1",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst w21, w21"
       ]
     },
     "xor rax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /6",
       "ExpectedArm64ASM": [
-        "eor x4, x4, #0x1",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x4",
+        "eor x21, x20, #0x1",
+        "mov x4, x21",
+        "mov x26, x21",
+        "tst x21, x21"
       ]
     },
     "cmp eax, 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x83 /7",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs w26, w27, #0x1 (1)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs w21, w20, #0x1 (1)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmp rax, 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x83 /7",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "subs x26, x27, #0x1 (1)",
+        "mov x20, x4",
+        "mov x27, x20",
+        "subs x21, x20, #0x1 (1)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "add ax, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "mvn w27, w4",
-        "lsl w0, w4, #16",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "lsl w0, w21, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w4, w20",
-        "bfxil x4, x26, #0, #16"
+        "add w22, w21, w20",
+        "mov x26, x22",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #16",
+        "mov x4, x20"
       ]
     },
     "add eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "adds w26, w4, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "adds w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "add rax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /0",
       "ExpectedArm64ASM": [
-        "mvn w27, w4",
-        "subs x26, x4, #0x1 (1)",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mvn w21, w20",
+        "mov x27, x21",
+        "subs x21, x20, #0x1 (1)",
+        "mov x26, x21",
+        "mov x4, x21"
       ]
     },
     "or eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /-1",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "orr w4, w4, w20",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x21, x4",
+        "orr w22, w21, w20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "or rax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /-1",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "orr x4, x4, x20",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x21, x4",
+        "orr x22, x21, x20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "adc eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x83 /2",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "adcs w26, w4, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "adcs w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "adc rax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP1 0x83 /2",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "mvn w27, w4",
-        "adcs x26, x4, x20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "adcs x22, x21, x20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sbb eax, -1": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 13,
       "Comment": "GROUP1 0x83 /3",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs w26, w4, w20",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "sbcs w22, w21, w20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x26, x22",
+        "mov x4, x22",
+        "msr nzcv, x21"
       ]
     },
     "sbb rax, -1": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 13,
       "Comment": "GROUP1 0x83 /3",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "mvn w27, w4",
-        "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "sbcs x26, x4, x20",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "sbcs x22, x21, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x26, x22",
+        "mov x4, x22",
+        "msr nzcv, x21"
       ]
     },
     "and eax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /4",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "ands w26, w4, w20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "ands w22, w21, w20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "and rax, -1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP1 0x83 /4",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "ands x26, x4, x20",
-        "mov x4, x26"
+        "mov x21, x4",
+        "ands x22, x21, x20",
+        "mov x26, x22",
+        "mov x4, x22"
       ]
     },
     "sub eax, -1": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP1 0x83 /5",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "subs w26, w4, w20",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "mov x4, x22",
+        "msr nzcv, x21"
       ]
     },
     "sub rax, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP1 0x83 /5",
       "ExpectedArm64ASM": [
-        "mvn w27, w4",
-        "adds x26, x4, #0x1 (1)",
+        "mov x20, x4",
+        "mvn w21, w20",
+        "mov x27, x21",
+        "adds x21, x20, #0x1 (1)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "xor eax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /6",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "eor w4, w4, w20",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x21, x4",
+        "eor w22, w21, w20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "xor rax, -1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP1 0x83 /6",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "eor x4, x4, x20",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x21, x4",
+        "eor x22, x21, x20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "cmp eax, -1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP1 0x83 /7",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "mvn w27, w4",
-        "subs w26, w4, w20",
+        "mov x21, x4",
+        "mvn w22, w21",
+        "mov x27, x22",
+        "subs w22, w21, w20",
+        "mov x26, x22",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmp rax, -1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP1 0x83 /7",
       "ExpectedArm64ASM": [
-        "mvn w27, w4",
-        "adds x26, x4, #0x1 (1)",
+        "mov x20, x4",
+        "mvn w21, w20",
+        "mov x27, x21",
+        "adds x21, x20, #0x1 (1)",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "rol al, 2": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP2 0xC0 /0",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "bfi w20, w4, #8, #8",
-        "bfi w20, w20, #16, #16",
-        "ror w20, w20, #30",
-        "bfxil x4, x20, #0, #8",
-        "mrs x21, nzcv",
-        "and w21, w21, #0xc0000000",
-        "ubfx x20, x20, #0, #1",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "mov w21, w20",
+        "bfi w21, w20, #8, #8",
+        "mov w22, w21",
+        "bfi w22, w21, #16, #16",
+        "ror w21, w22, #30",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mrs x20, nzcv",
+        "and w22, w20, #0xc0000000",
+        "ubfx x20, x21, #0, #1",
+        "orr w21, w22, w20, lsl #29",
+        "msr nzcv, x21"
       ]
     },
     "ror al, 2": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP2 0xC0 /1",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "bfi w20, w4, #8, #8",
-        "ror w20, w20, #2",
-        "bfxil x4, x20, #0, #8",
-        "mrs x21, nzcv",
-        "and w21, w21, #0xc0000000",
-        "ubfx x20, x20, #7, #1",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "mov w21, w20",
+        "bfi w21, w20, #8, #8",
+        "ror w22, w21, #2",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #8",
+        "mov x4, x21",
+        "mrs x20, nzcv",
+        "and w21, w20, #0xc0000000",
+        "ubfx x20, x22, #7, #1",
+        "orr w22, w21, w20, lsl #29",
+        "msr nzcv, x22"
       ]
     },
     "rcl al, 2": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 36,
       "Comment": "GROUP2 0xC0 /2",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "mov w21, #0x0",
-        "cset w22, hs",
-        "bfi x21, x20, #55, #8",
-        "bfi x21, x22, #63, #1",
-        "bfi x21, x20, #46, #8",
-        "bfi x21, x22, #54, #1",
-        "bfi x21, x20, #37, #8",
-        "bfi x21, x22, #45, #1",
-        "bfi x21, x20, #28, #8",
-        "bfi x21, x22, #36, #1",
-        "bfi x21, x20, #19, #8",
-        "bfi x21, x22, #27, #1",
-        "mov x0, x21",
-        "bfxil x0, x20, #0, #8",
-        "mov x20, x0",
-        "ror x21, x20, #62",
-        "bfxil x4, x21, #0, #8",
-        "ror x20, x20, #61",
-        "ubfx x20, x20, #0, #1",
-        "mrs x21, nzcv",
-        "mov w0, w21",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "mov w22, #0x0",
+        "cset w23, hs",
+        "mov x24, x22",
+        "bfi x24, x21, #55, #8",
+        "mov x22, x24",
+        "bfi x22, x23, #63, #1",
+        "mov x24, x22",
+        "bfi x24, x21, #46, #8",
+        "mov x22, x24",
+        "bfi x22, x23, #54, #1",
+        "mov x24, x22",
+        "bfi x24, x21, #37, #8",
+        "mov x22, x24",
+        "bfi x22, x23, #45, #1",
+        "mov x24, x22",
+        "bfi x24, x21, #28, #8",
+        "mov x22, x24",
+        "bfi x22, x23, #36, #1",
+        "mov x24, x22",
+        "bfi x24, x21, #19, #8",
+        "mov x22, x24",
+        "bfi x22, x23, #27, #1",
+        "mov x23, x22",
+        "bfxil x23, x21, #0, #8",
+        "ror x21, x23, #62",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "ror x20, x23, #61",
+        "ubfx x21, x20, #0, #1",
+        "mrs x20, nzcv",
+        "mov w22, w20",
+        "bfi w22, w21, #29, #1",
+        "msr nzcv, x22"
       ]
     },
     "rcr al, 2": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 20,
       "Comment": "GROUP2 0xC0 /3",
       "ExpectedArm64ASM": [
         "cset w20, hs",
-        "uxtb w21, w4",
-        "mov x0, x21",
-        "bfi x0, x20, #8, #1",
-        "mov x20, x0",
-        "bfi x20, x20, #9, #9",
-        "bfi x20, x20, #18, #18",
-        "bfi x20, x20, #36, #9",
-        "lsr w21, w20, #2",
-        "bfxil x4, x21, #0, #8",
-        "ubfx x20, x20, #1, #1",
-        "mrs x21, nzcv",
-        "mov w0, w21",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
-        "msr nzcv, x20"
+        "mov x21, x4",
+        "uxtb w22, w21",
+        "mov x23, x22",
+        "bfi x23, x20, #8, #1",
+        "mov x20, x23",
+        "bfi x20, x23, #9, #9",
+        "mov x22, x20",
+        "bfi x22, x20, #18, #18",
+        "mov x20, x22",
+        "bfi x20, x22, #36, #9",
+        "lsr w22, w20, #2",
+        "mov x23, x21",
+        "bfxil x23, x22, #0, #8",
+        "mov x4, x23",
+        "ubfx x21, x20, #1, #1",
+        "mrs x20, nzcv",
+        "mov w22, w20",
+        "bfi w22, w21, #29, #1",
+        "msr nzcv, x22"
       ]
     },
     "shl al, 2": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP2 0xC0 /4",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "lsl w26, w20, #2",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24",
-        "ubfx x20, x20, #6, #1",
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "lsl w22, w21, #2",
+        "mov x23, x20",
+        "bfxil x23, x22, #0, #8",
+        "mov x4, x23",
+        "cmn wzr, w22, lsl #24",
+        "ubfx x20, x21, #6, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "orr w23, w21, w20, lsl #29",
+        "mov x26, x22",
+        "msr nzcv, x23"
       ]
     },
     "shr al, 2": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP2 0xC0 /5",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "lsr w26, w20, #2",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24",
-        "ubfx x20, x20, #1, #1",
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "lsr w22, w21, #2",
+        "mov x23, x20",
+        "bfxil x23, x22, #0, #8",
+        "mov x4, x23",
+        "cmn wzr, w22, lsl #24",
+        "ubfx x20, x21, #1, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "orr w23, w21, w20, lsl #29",
+        "mov x26, x22",
+        "msr nzcv, x23"
       ]
     },
     "sar al, 2": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": "GROUP2 0xC0 /7",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "sxtb x20, w20",
-        "asr x26, x20, #2",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24",
-        "ubfx x20, x20, #1, #1",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "sxtb x22, w21",
+        "asr x21, x22, #2",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #8",
+        "mov x4, x23",
+        "cmn wzr, w21, lsl #24",
+        "ubfx x20, x22, #1, #1",
+        "mrs x22, nzcv",
+        "orr w23, w22, w20, lsl #29",
+        "mov x26, x21",
+        "msr nzcv, x23"
       ]
     },
     "rol ax, 2": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP2 0xC1 /0",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "bfi w20, w4, #16, #16",
-        "ror w20, w20, #30",
-        "bfxil x4, x20, #0, #16",
-        "mrs x21, nzcv",
-        "and w21, w21, #0xc0000000",
-        "ubfx x20, x20, #0, #1",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "mov w21, w20",
+        "bfi w21, w20, #16, #16",
+        "ror w22, w21, #30",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21",
+        "mrs x20, nzcv",
+        "and w21, w20, #0xc0000000",
+        "ubfx x20, x22, #0, #1",
+        "orr w22, w21, w20, lsl #29",
+        "msr nzcv, x22"
       ]
     },
     "rol eax, 2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP2 0xC1 /0",
       "ExpectedArm64ASM": [
-        "ror w4, w4, #30",
+        "mov x20, x4",
+        "ror w21, w20, #30",
+        "mov x4, x21",
         "mrs x20, nzcv",
-        "and w20, w20, #0xc0000000",
-        "ubfx x21, x4, #0, #1",
-        "orr w20, w20, w21, lsl #29",
-        "msr nzcv, x20"
+        "and w22, w20, #0xc0000000",
+        "ubfx x20, x21, #0, #1",
+        "orr w21, w22, w20, lsl #29",
+        "msr nzcv, x21"
       ]
     },
     "rol rax, 2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP2 0xC1 /0",
       "ExpectedArm64ASM": [
-        "ror x4, x4, #62",
+        "mov x20, x4",
+        "ror x21, x20, #62",
+        "mov x4, x21",
         "mrs x20, nzcv",
-        "and w20, w20, #0xc0000000",
-        "ubfx x21, x4, #0, #1",
-        "orr w20, w20, w21, lsl #29",
-        "msr nzcv, x20"
+        "and w22, w20, #0xc0000000",
+        "ubfx x20, x21, #0, #1",
+        "orr w21, w22, w20, lsl #29",
+        "msr nzcv, x21"
       ]
     },
     "ror ax, 2": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP2 0xC1 /1",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "bfi w20, w4, #16, #16",
-        "ror w20, w20, #2",
-        "bfxil x4, x20, #0, #16",
-        "mrs x21, nzcv",
-        "and w21, w21, #0xc0000000",
-        "ubfx x20, x20, #15, #1",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "mov w21, w20",
+        "bfi w21, w20, #16, #16",
+        "ror w22, w21, #2",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21",
+        "mrs x20, nzcv",
+        "and w21, w20, #0xc0000000",
+        "ubfx x20, x22, #15, #1",
+        "orr w22, w21, w20, lsl #29",
+        "msr nzcv, x22"
       ]
     },
     "ror eax, 2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP2 0xC1 /1",
       "ExpectedArm64ASM": [
-        "ror w4, w4, #2",
+        "mov x20, x4",
+        "ror w21, w20, #2",
+        "mov x4, x21",
         "mrs x20, nzcv",
-        "and w20, w20, #0xc0000000",
-        "ubfx x21, x4, #31, #1",
-        "orr w20, w20, w21, lsl #29",
-        "msr nzcv, x20"
+        "and w22, w20, #0xc0000000",
+        "ubfx x20, x21, #31, #1",
+        "orr w21, w22, w20, lsl #29",
+        "msr nzcv, x21"
       ]
     },
     "ror rax, 2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP2 0xC1 /1",
       "ExpectedArm64ASM": [
-        "ror x4, x4, #2",
+        "mov x20, x4",
+        "ror x21, x20, #2",
+        "mov x4, x21",
         "mrs x20, nzcv",
-        "and w20, w20, #0xc0000000",
-        "lsr x21, x4, #63",
-        "orr w20, w20, w21, lsl #29",
-        "msr nzcv, x20"
+        "and w22, w20, #0xc0000000",
+        "lsr x20, x21, #63",
+        "orr w21, w22, w20, lsl #29",
+        "msr nzcv, x21"
       ]
     },
     "rcl ax, 2": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 28,
       "Comment": "GROUP2 0xC1 /2",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "mov w21, #0x0",
-        "cset w22, hs",
-        "bfi x21, x20, #47, #16",
-        "bfi x21, x22, #63, #1",
-        "bfi x21, x20, #30, #16",
-        "bfi x21, x22, #46, #1",
-        "bfi x21, x20, #13, #16",
-        "bfi x21, x22, #29, #1",
-        "mov x0, x21",
-        "bfxil x0, x20, #0, #16",
-        "mov x20, x0",
-        "ror x21, x20, #62",
-        "bfxil x4, x21, #0, #16",
-        "ror x20, x20, #61",
-        "ubfx x20, x20, #0, #1",
-        "mrs x21, nzcv",
-        "mov w0, w21",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "uxth w21, w20",
+        "mov w22, #0x0",
+        "cset w23, hs",
+        "mov x24, x22",
+        "bfi x24, x21, #47, #16",
+        "mov x22, x24",
+        "bfi x22, x23, #63, #1",
+        "mov x24, x22",
+        "bfi x24, x21, #30, #16",
+        "mov x22, x24",
+        "bfi x22, x23, #46, #1",
+        "mov x24, x22",
+        "bfi x24, x21, #13, #16",
+        "mov x22, x24",
+        "bfi x22, x23, #29, #1",
+        "mov x23, x22",
+        "bfxil x23, x21, #0, #16",
+        "ror x21, x23, #62",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22",
+        "ror x20, x23, #61",
+        "ubfx x21, x20, #0, #1",
+        "mrs x20, nzcv",
+        "mov w22, w20",
+        "bfi w22, w21, #29, #1",
+        "msr nzcv, x22"
       ]
     },
     "rcl eax, 2": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP2 0xC1 /2",
       "ExpectedArm64ASM": [
-        "lsl w20, w4, #2",
-        "cset w21, hs",
-        "orr w20, w20, w4, lsr #31",
-        "ubfx x22, x4, #30, #1",
-        "lsl x22, x22, #29",
-        "orr w4, w20, w21, lsl #1",
-        "msr nzcv, x22"
+        "mov x20, x4",
+        "lsl w21, w20, #2",
+        "cset w22, hs",
+        "orr w23, w21, w20, lsr #31",
+        "ubfx x21, x20, #30, #1",
+        "lsl x20, x21, #29",
+        "orr w21, w23, w22, lsl #1",
+        "mov x4, x21",
+        "msr nzcv, x20"
       ]
     },
     "rcl rax, 2": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP2 0xC1 /2",
       "ExpectedArm64ASM": [
-        "lsl x20, x4, #2",
-        "cset w21, hs",
-        "orr x20, x20, x4, lsr #63",
-        "ubfx x22, x4, #62, #1",
-        "lsl x22, x22, #29",
-        "orr x4, x20, x21, lsl #1",
-        "msr nzcv, x22"
+        "mov x20, x4",
+        "lsl x21, x20, #2",
+        "cset w22, hs",
+        "orr x23, x21, x20, lsr #63",
+        "ubfx x21, x20, #62, #1",
+        "lsl x20, x21, #29",
+        "orr x21, x23, x22, lsl #1",
+        "mov x4, x21",
+        "msr nzcv, x20"
       ]
     },
     "rcr ax, 2": {
-      "ExpectedInstructionCount": 15,
+      "ExpectedInstructionCount": 18,
       "Comment": "GROUP2 0xC1 /3",
       "ExpectedArm64ASM": [
         "cset w20, hs",
-        "uxth w21, w4",
-        "mov x0, x21",
-        "bfi x0, x20, #16, #1",
-        "mov x20, x0",
-        "bfi x20, x20, #17, #17",
-        "bfi x20, x20, #34, #17",
-        "lsr w21, w20, #2",
-        "bfxil x4, x21, #0, #16",
-        "ubfx x20, x20, #1, #1",
+        "mov x21, x4",
+        "uxth w22, w21",
+        "mov x23, x22",
+        "bfi x23, x20, #16, #1",
+        "mov x20, x23",
+        "bfi x20, x23, #17, #17",
+        "mov x22, x20",
+        "bfi x22, x20, #34, #17",
+        "lsr w20, w22, #2",
+        "mov x23, x21",
+        "bfxil x23, x20, #0, #16",
+        "mov x4, x23",
+        "ubfx x20, x22, #1, #1",
         "mrs x21, nzcv",
-        "mov w0, w21",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
-        "msr nzcv, x20"
+        "mov w22, w21",
+        "bfi w22, w20, #29, #1",
+        "msr nzcv, x22"
       ]
     },
     "rcr eax, 2": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP2 0xC1 /3",
       "ExpectedArm64ASM": [
-        "lsr w20, w4, #2",
-        "cset w21, hs",
-        "orr w20, w20, w4, lsl #31",
-        "ubfx x22, x4, #1, #1",
-        "lsl x22, x22, #29",
-        "orr w4, w20, w21, lsl #30",
-        "msr nzcv, x22"
+        "mov x20, x4",
+        "lsr w21, w20, #2",
+        "cset w22, hs",
+        "orr w23, w21, w20, lsl #31",
+        "ubfx x21, x20, #1, #1",
+        "lsl x20, x21, #29",
+        "orr w21, w23, w22, lsl #30",
+        "mov x4, x21",
+        "msr nzcv, x20"
       ]
     },
     "rcr rax, 2": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP2 0xC1 /3",
       "ExpectedArm64ASM": [
-        "lsr x20, x4, #2",
-        "cset w21, hs",
-        "orr x20, x20, x4, lsl #63",
-        "ubfx x22, x4, #1, #1",
-        "lsl x22, x22, #29",
-        "orr x4, x20, x21, lsl #62",
-        "msr nzcv, x22"
+        "mov x20, x4",
+        "lsr x21, x20, #2",
+        "cset w22, hs",
+        "orr x23, x21, x20, lsl #63",
+        "ubfx x21, x20, #1, #1",
+        "lsl x20, x21, #29",
+        "orr x21, x23, x22, lsl #62",
+        "mov x4, x21",
+        "msr nzcv, x20"
       ]
     },
     "shl ax, 2": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP2 0xC1 /4",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "lsl w26, w20, #2",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "ubfx x20, x20, #14, #1",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "lsl w22, w21, #2",
+        "mov x23, x20",
+        "bfxil x23, x22, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w22, lsl #16",
+        "ubfx x20, x21, #14, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "orr w23, w21, w20, lsl #29",
+        "mov x26, x22",
+        "msr nzcv, x23"
       ]
     },
     "shl eax, 2": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xC1 /4",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "lsl w4, w20, #2",
-        "tst w4, w4",
-        "ubfx x20, x20, #30, #1",
+        "mov x20, x4",
+        "mov w21, w20",
+        "lsl w20, w21, #2",
+        "mov x4, x20",
+        "tst w20, w20",
+        "ubfx x22, x21, #30, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "mov x26, x4",
-        "msr nzcv, x20"
+        "orr w23, w21, w22, lsl #29",
+        "mov x26, x20",
+        "msr nzcv, x23"
       ]
     },
     "shl rax, 2": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP2 0xC1 /4",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "lsl x4, x20, #2",
-        "tst x4, x4",
-        "ubfx x20, x20, #62, #1",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "mov x26, x4",
-        "msr nzcv, x20"
+        "lsl x21, x20, #2",
+        "mov x4, x21",
+        "tst x21, x21",
+        "ubfx x22, x20, #62, #1",
+        "mrs x20, nzcv",
+        "orr w23, w20, w22, lsl #29",
+        "mov x26, x21",
+        "msr nzcv, x23"
       ]
     },
     "shr ax, 2": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP2 0xC1 /5",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "lsr w26, w20, #2",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "ubfx x20, x20, #1, #1",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "lsr w22, w21, #2",
+        "mov x23, x20",
+        "bfxil x23, x22, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w22, lsl #16",
+        "ubfx x20, x21, #1, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "orr w23, w21, w20, lsl #29",
+        "mov x26, x22",
+        "msr nzcv, x23"
       ]
     },
     "shr eax, 2": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xC1 /5",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "lsr w4, w20, #2",
-        "tst w4, w4",
-        "ubfx x20, x20, #1, #1",
+        "mov x20, x4",
+        "mov w21, w20",
+        "lsr w20, w21, #2",
+        "mov x4, x20",
+        "tst w20, w20",
+        "ubfx x22, x21, #1, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "mov x26, x4",
-        "msr nzcv, x20"
+        "orr w23, w21, w22, lsl #29",
+        "mov x26, x20",
+        "msr nzcv, x23"
       ]
     },
     "shr rax, 2": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP2 0xC1 /5",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "lsr x4, x20, #2",
-        "tst x4, x4",
-        "ubfx x20, x20, #1, #1",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "mov x26, x4",
-        "msr nzcv, x20"
+        "lsr x21, x20, #2",
+        "mov x4, x21",
+        "tst x21, x21",
+        "ubfx x22, x20, #1, #1",
+        "mrs x20, nzcv",
+        "orr w23, w20, w22, lsl #29",
+        "mov x26, x21",
+        "msr nzcv, x23"
       ]
     },
     "sar ax, 2": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": "GROUP2 0xC1 /7",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "sxth x20, w20",
-        "asr x26, x20, #2",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "ubfx x20, x20, #1, #1",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "uxth w21, w20",
+        "sxth x22, w21",
+        "asr x21, x22, #2",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w21, lsl #16",
+        "ubfx x20, x22, #1, #1",
+        "mrs x22, nzcv",
+        "orr w23, w22, w20, lsl #29",
+        "mov x26, x21",
+        "msr nzcv, x23"
       ]
     },
     "sar eax, 2": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xC1 /7",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "asr w4, w20, #2",
-        "tst w4, w4",
-        "ubfx x20, x20, #1, #1",
+        "mov x20, x4",
+        "mov w21, w20",
+        "asr w20, w21, #2",
+        "mov x4, x20",
+        "tst w20, w20",
+        "ubfx x22, x21, #1, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "mov x26, x4",
-        "msr nzcv, x20"
+        "orr w23, w21, w22, lsl #29",
+        "mov x26, x20",
+        "msr nzcv, x23"
       ]
     },
     "sar rax, 2": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP2 0xC1 /7",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "asr x4, x20, #2",
-        "tst x4, x4",
-        "ubfx x20, x20, #1, #1",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "mov x26, x4",
-        "msr nzcv, x20"
+        "asr x21, x20, #2",
+        "mov x4, x21",
+        "tst x21, x21",
+        "ubfx x22, x20, #1, #1",
+        "mrs x20, nzcv",
+        "orr w23, w20, w22, lsl #29",
+        "mov x26, x21",
+        "msr nzcv, x23"
       ]
     },
     "rol al, 1": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 17,
       "Comment": "GROUP2 0xd0 /0",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "bfi w20, w4, #8, #8",
-        "bfi w20, w20, #16, #16",
-        "ror w20, w20, #31",
-        "bfxil x4, x20, #0, #8",
-        "mrs x21, nzcv",
-        "and w21, w21, #0xc0000000",
-        "ubfx x22, x20, #0, #1",
-        "orr w21, w21, w22, lsl #29",
-        "eor w20, w20, w20, lsr #7",
-        "ubfx x20, x20, #0, #1",
-        "orr w20, w21, w20, lsl #28",
+        "mov x20, x4",
+        "mov w21, w20",
+        "bfi w21, w20, #8, #8",
+        "mov w22, w21",
+        "bfi w22, w21, #16, #16",
+        "ror w21, w22, #31",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "mrs x20, nzcv",
+        "and w22, w20, #0xc0000000",
+        "ubfx x20, x21, #0, #1",
+        "orr w23, w22, w20, lsl #29",
+        "eor w20, w21, w21, lsr #7",
+        "ubfx x21, x20, #0, #1",
+        "orr w20, w23, w21, lsl #28",
         "msr nzcv, x20"
       ]
     },
     "ror al, 1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 15,
       "Comment": "GROUP2 0xd0 /1",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "bfi w20, w4, #8, #8",
-        "ror w20, w20, #1",
-        "bfxil x4, x20, #0, #8",
-        "mrs x21, nzcv",
-        "and w21, w21, #0xc0000000",
-        "ubfx x22, x20, #7, #1",
-        "orr w21, w21, w22, lsl #29",
-        "eor w20, w20, w20, lsr #1",
-        "ubfx x20, x20, #6, #1",
-        "orr w20, w21, w20, lsl #28",
+        "mov x20, x4",
+        "mov w21, w20",
+        "bfi w21, w20, #8, #8",
+        "ror w22, w21, #1",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #8",
+        "mov x4, x21",
+        "mrs x20, nzcv",
+        "and w21, w20, #0xc0000000",
+        "ubfx x20, x22, #7, #1",
+        "orr w23, w21, w20, lsl #29",
+        "eor w20, w22, w22, lsr #1",
+        "ubfx x21, x20, #6, #1",
+        "orr w20, w23, w21, lsl #28",
         "msr nzcv, x20"
       ]
     },
     "rcl al, 1": {
-      "ExpectedInstructionCount": 15,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP2 0xd0 /2",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "cset w21, hs",
-        "orr w21, w21, w20, lsl #1",
-        "bfxil x4, x21, #0, #8",
-        "ubfx x22, x20, #7, #1",
-        "mrs x23, nzcv",
-        "mov w0, w23",
-        "bfi w0, w22, #29, #1",
-        "mov w22, w0",
-        "eor w20, w21, w20",
-        "ubfx x20, x20, #7, #1",
-        "mov w0, w22",
-        "bfi w0, w20, #28, #1",
-        "mov w20, w0",
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "cset w22, hs",
+        "orr w23, w22, w21, lsl #1",
+        "mov x22, x20",
+        "bfxil x22, x23, #0, #8",
+        "mov x4, x22",
+        "ubfx x20, x21, #7, #1",
+        "mrs x22, nzcv",
+        "mov w24, w22",
+        "bfi w24, w20, #29, #1",
+        "eor w20, w23, w21",
+        "ubfx x21, x20, #7, #1",
+        "mov w20, w24",
+        "bfi w20, w21, #28, #1",
         "msr nzcv, x20"
       ]
     },
     "rcr al, 1": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 18,
       "Comment": "GROUP2 0xd0 /3",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "cset w21, hs",
-        "ubfx x22, x20, #0, #1",
-        "mrs x23, nzcv",
-        "mov w0, w23",
-        "bfi w0, w22, #29, #1",
-        "mov w22, w0",
-        "ubfx w20, w20, #1, #7",
-        "bfi w20, w21, #7, #1",
-        "bfxil x4, x20, #0, #8",
-        "eor w20, w20, w20, lsr #1",
-        "ubfx x20, x20, #6, #1",
-        "mov w0, w22",
-        "bfi w0, w20, #28, #1",
-        "mov w20, w0",
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "cset w22, hs",
+        "ubfx x23, x21, #0, #1",
+        "mrs x24, nzcv",
+        "mov w25, w24",
+        "bfi w25, w23, #29, #1",
+        "ubfx w23, w21, #1, #7",
+        "mov w21, w23",
+        "bfi w21, w22, #7, #1",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22",
+        "eor w20, w21, w21, lsr #1",
+        "ubfx x21, x20, #6, #1",
+        "mov w20, w25",
+        "bfi w20, w21, #28, #1",
         "msr nzcv, x20"
       ]
     },
     "shl al, 1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 15,
       "Comment": "GROUP2 0xd0 /4",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "lsl w26, w20, #1",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24",
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "lsl w22, w21, #1",
+        "mov x23, x20",
+        "bfxil x23, x22, #0, #8",
+        "mov x4, x23",
+        "cmn wzr, w22, lsl #24",
+        "ubfx x20, x21, #7, #1",
+        "mrs x23, nzcv",
+        "orr w24, w23, w20, lsl #29",
+        "mov x26, x22",
+        "eor w20, w22, w21",
         "ubfx x21, x20, #7, #1",
-        "mrs x22, nzcv",
-        "orr w21, w22, w21, lsl #29",
-        "eor w20, w26, w20",
-        "ubfx x20, x20, #7, #1",
-        "orr w20, w21, w20, lsl #28",
+        "orr w20, w24, w21, lsl #28",
         "msr nzcv, x20"
       ]
     },
     "shr al, 1": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP2 0xd0 /5",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "lsr w26, w20, #1",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24",
-        "ubfx x21, x20, #0, #1",
-        "mrs x22, nzcv",
-        "orr w21, w22, w21, lsl #29",
-        "ubfx x20, x20, #7, #1",
-        "orr w20, w21, w20, lsl #28",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "lsr w22, w21, #1",
+        "mov x23, x20",
+        "bfxil x23, x22, #0, #8",
+        "mov x4, x23",
+        "cmn wzr, w22, lsl #24",
+        "ubfx x20, x21, #0, #1",
+        "mrs x23, nzcv",
+        "orr w24, w23, w20, lsl #29",
+        "mov x26, x22",
+        "ubfx x20, x21, #7, #1",
+        "orr w21, w24, w20, lsl #28",
+        "msr nzcv, x21"
       ]
     },
     "sar al, 1": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": "GROUP2 0xd0 /7",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "sxtb x20, w20",
-        "asr x26, x20, #1",
-        "bfxil x4, x26, #0, #8",
-        "cmn wzr, w26, lsl #24",
-        "ubfx x20, x20, #0, #1",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "sxtb x22, w21",
+        "asr x21, x22, #1",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #8",
+        "mov x4, x23",
+        "cmn wzr, w21, lsl #24",
+        "ubfx x20, x22, #0, #1",
+        "mrs x22, nzcv",
+        "orr w23, w22, w20, lsl #29",
+        "mov x26, x21",
+        "msr nzcv, x23"
       ]
     },
     "rol ax, 1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 15,
       "Comment": "GROUP2 0xd1 /0",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "bfi w20, w4, #16, #16",
-        "ror w20, w20, #31",
-        "bfxil x4, x20, #0, #16",
-        "mrs x21, nzcv",
-        "and w21, w21, #0xc0000000",
-        "ubfx x22, x20, #0, #1",
-        "orr w21, w21, w22, lsl #29",
-        "eor w20, w20, w20, lsr #15",
-        "ubfx x20, x20, #0, #1",
-        "orr w20, w21, w20, lsl #28",
+        "mov x20, x4",
+        "mov w21, w20",
+        "bfi w21, w20, #16, #16",
+        "ror w22, w21, #31",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21",
+        "mrs x20, nzcv",
+        "and w21, w20, #0xc0000000",
+        "ubfx x20, x22, #0, #1",
+        "orr w23, w21, w20, lsl #29",
+        "eor w20, w22, w22, lsr #15",
+        "ubfx x21, x20, #0, #1",
+        "orr w20, w23, w21, lsl #28",
         "msr nzcv, x20"
       ]
     },
     "rol eax, 1": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP2 0xd1 /0",
       "ExpectedArm64ASM": [
-        "ror w4, w4, #31",
+        "mov x20, x4",
+        "ror w21, w20, #31",
+        "mov x4, x21",
         "mrs x20, nzcv",
-        "and w20, w20, #0xc0000000",
-        "ubfx x21, x4, #0, #1",
-        "orr w20, w20, w21, lsl #29",
-        "eor w21, w4, w4, lsr #31",
-        "ubfx x21, x21, #0, #1",
-        "orr w20, w20, w21, lsl #28",
+        "and w22, w20, #0xc0000000",
+        "ubfx x20, x21, #0, #1",
+        "orr w23, w22, w20, lsl #29",
+        "eor w20, w21, w21, lsr #31",
+        "ubfx x21, x20, #0, #1",
+        "orr w20, w23, w21, lsl #28",
         "msr nzcv, x20"
       ]
     },
     "rol rax, 1": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP2 0xd1 /0",
       "ExpectedArm64ASM": [
-        "ror x4, x4, #63",
+        "mov x20, x4",
+        "ror x21, x20, #63",
+        "mov x4, x21",
         "mrs x20, nzcv",
-        "and w20, w20, #0xc0000000",
-        "ubfx x21, x4, #0, #1",
-        "orr w20, w20, w21, lsl #29",
-        "eor x21, x4, x4, lsr #63",
-        "ubfx x21, x21, #0, #1",
-        "orr w20, w20, w21, lsl #28",
+        "and w22, w20, #0xc0000000",
+        "ubfx x20, x21, #0, #1",
+        "orr w23, w22, w20, lsl #29",
+        "eor x20, x21, x21, lsr #63",
+        "ubfx x21, x20, #0, #1",
+        "orr w20, w23, w21, lsl #28",
         "msr nzcv, x20"
       ]
     },
     "ror ax, 1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 15,
       "Comment": "GROUP2 0xd1 /1",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "bfi w20, w4, #16, #16",
-        "ror w20, w20, #1",
-        "bfxil x4, x20, #0, #16",
-        "mrs x21, nzcv",
-        "and w21, w21, #0xc0000000",
-        "ubfx x22, x20, #15, #1",
-        "orr w21, w21, w22, lsl #29",
-        "eor w20, w20, w20, lsr #1",
-        "ubfx x20, x20, #14, #1",
-        "orr w20, w21, w20, lsl #28",
+        "mov x20, x4",
+        "mov w21, w20",
+        "bfi w21, w20, #16, #16",
+        "ror w22, w21, #1",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21",
+        "mrs x20, nzcv",
+        "and w21, w20, #0xc0000000",
+        "ubfx x20, x22, #15, #1",
+        "orr w23, w21, w20, lsl #29",
+        "eor w20, w22, w22, lsr #1",
+        "ubfx x21, x20, #14, #1",
+        "orr w20, w23, w21, lsl #28",
         "msr nzcv, x20"
       ]
     },
     "ror eax, 1": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP2 0xd1 /1",
       "ExpectedArm64ASM": [
-        "ror w4, w4, #1",
+        "mov x20, x4",
+        "ror w21, w20, #1",
+        "mov x4, x21",
         "mrs x20, nzcv",
-        "and w20, w20, #0xc0000000",
-        "ubfx x21, x4, #31, #1",
-        "orr w20, w20, w21, lsl #29",
-        "eor w21, w4, w4, lsr #1",
-        "ubfx x21, x21, #30, #1",
-        "orr w20, w20, w21, lsl #28",
+        "and w22, w20, #0xc0000000",
+        "ubfx x20, x21, #31, #1",
+        "orr w23, w22, w20, lsl #29",
+        "eor w20, w21, w21, lsr #1",
+        "ubfx x21, x20, #30, #1",
+        "orr w20, w23, w21, lsl #28",
         "msr nzcv, x20"
       ]
     },
     "ror rax, 1": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP2 0xd1 /1",
       "ExpectedArm64ASM": [
-        "ror x4, x4, #1",
+        "mov x20, x4",
+        "ror x21, x20, #1",
+        "mov x4, x21",
         "mrs x20, nzcv",
-        "and w20, w20, #0xc0000000",
-        "lsr x21, x4, #63",
-        "orr w20, w20, w21, lsl #29",
-        "eor x21, x4, x4, lsr #1",
-        "ubfx x21, x21, #62, #1",
-        "orr w20, w20, w21, lsl #28",
+        "and w22, w20, #0xc0000000",
+        "lsr x20, x21, #63",
+        "orr w23, w22, w20, lsl #29",
+        "eor x20, x21, x21, lsr #1",
+        "ubfx x21, x20, #62, #1",
+        "orr w20, w23, w21, lsl #28",
         "msr nzcv, x20"
       ]
     },
     "rcl ax, 1": {
-      "ExpectedInstructionCount": 15,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP2 0xd1 /2",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "cset w21, hs",
-        "orr w21, w21, w20, lsl #1",
-        "bfxil x4, x21, #0, #16",
-        "ubfx x22, x20, #15, #1",
-        "mrs x23, nzcv",
-        "mov w0, w23",
-        "bfi w0, w22, #29, #1",
-        "mov w22, w0",
-        "eor w20, w21, w20",
-        "ubfx x20, x20, #15, #1",
-        "mov w0, w22",
-        "bfi w0, w20, #28, #1",
-        "mov w20, w0",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "cset w22, hs",
+        "orr w23, w22, w21, lsl #1",
+        "mov x22, x20",
+        "bfxil x22, x23, #0, #16",
+        "mov x4, x22",
+        "ubfx x20, x21, #15, #1",
+        "mrs x22, nzcv",
+        "mov w24, w22",
+        "bfi w24, w20, #29, #1",
+        "eor w20, w23, w21",
+        "ubfx x21, x20, #15, #1",
+        "mov w20, w24",
+        "bfi w20, w21, #28, #1",
         "msr nzcv, x20"
       ]
     },
@@ -1672,60 +2009,60 @@
       "ExpectedInstructionCount": 14,
       "Comment": "GROUP2 0xd1 /2",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "cset w21, hs",
-        "orr w4, w21, w20, lsl #1",
+        "mov x20, x4",
+        "mov w21, w20",
+        "cset w20, hs",
+        "orr w22, w20, w21, lsl #1",
+        "mov x4, x22",
+        "ubfx x20, x21, #31, #1",
+        "mrs x23, nzcv",
+        "mov w24, w23",
+        "bfi w24, w20, #29, #1",
+        "eor w20, w22, w21",
         "ubfx x21, x20, #31, #1",
-        "mrs x22, nzcv",
-        "mov w0, w22",
-        "bfi w0, w21, #29, #1",
-        "mov w21, w0",
-        "eor w20, w4, w20",
-        "ubfx x20, x20, #31, #1",
-        "mov w0, w21",
-        "bfi w0, w20, #28, #1",
-        "mov w20, w0",
+        "mov w20, w24",
+        "bfi w20, w21, #28, #1",
         "msr nzcv, x20"
       ]
     },
     "rcl rax, 1": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 13,
       "Comment": "GROUP2 0xd1 /2",
       "ExpectedArm64ASM": [
         "mov x20, x4",
         "cset w21, hs",
-        "orr x4, x21, x20, lsl #1",
+        "orr x22, x21, x20, lsl #1",
+        "mov x4, x22",
         "lsr x21, x20, #63",
-        "mrs x22, nzcv",
-        "mov w0, w22",
-        "bfi w0, w21, #29, #1",
-        "mov w21, w0",
-        "eor x20, x4, x20",
-        "lsr x20, x20, #63",
-        "mov w0, w21",
-        "bfi w0, w20, #28, #1",
-        "mov w20, w0",
-        "msr nzcv, x20"
+        "mrs x23, nzcv",
+        "mov w24, w23",
+        "bfi w24, w21, #29, #1",
+        "eor x21, x22, x20",
+        "lsr x20, x21, #63",
+        "mov w21, w24",
+        "bfi w21, w20, #28, #1",
+        "msr nzcv, x21"
       ]
     },
     "rcr ax, 1": {
-      "ExpectedInstructionCount": 15,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP2 0xd1 /3",
       "ExpectedArm64ASM": [
-        "cset w20, hs",
-        "ubfx x21, x4, #0, #1",
-        "mrs x22, nzcv",
-        "mov w0, w22",
-        "bfi w0, w21, #29, #1",
-        "mov w21, w0",
-        "ubfx w22, w4, #1, #15",
-        "orr w20, w22, w20, lsl #15",
-        "bfxil x4, x20, #0, #16",
-        "eor x20, x20, x20, lsr #1",
-        "ubfx x20, x20, #14, #1",
-        "mov w0, w21",
-        "bfi w0, w20, #28, #1",
-        "mov w20, w0",
+        "mov x20, x4",
+        "cset w21, hs",
+        "ubfx x22, x20, #0, #1",
+        "mrs x23, nzcv",
+        "mov w24, w23",
+        "bfi w24, w22, #29, #1",
+        "ubfx w22, w20, #1, #15",
+        "orr w23, w22, w21, lsl #15",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #16",
+        "mov x4, x21",
+        "eor x20, x23, x23, lsr #1",
+        "ubfx x21, x20, #14, #1",
+        "mov w20, w24",
+        "bfi w20, w21, #28, #1",
         "msr nzcv, x20"
       ]
     },
@@ -1733,18 +2070,18 @@
       "ExpectedInstructionCount": 13,
       "Comment": "GROUP2 0xd1 /3",
       "ExpectedArm64ASM": [
-        "cset w20, hs",
-        "ubfx x21, x4, #0, #1",
-        "mrs x22, nzcv",
-        "mov w0, w22",
-        "bfi w0, w21, #29, #1",
-        "mov w21, w0",
-        "extr w4, w20, w4, #1",
-        "eor x20, x4, x4, lsr #1",
-        "ubfx x20, x20, #30, #1",
-        "mov w0, w21",
-        "bfi w0, w20, #28, #1",
-        "mov w20, w0",
+        "mov x20, x4",
+        "cset w21, hs",
+        "ubfx x22, x20, #0, #1",
+        "mrs x23, nzcv",
+        "mov w24, w23",
+        "bfi w24, w22, #29, #1",
+        "extr w22, w21, w20, #1",
+        "mov x4, x22",
+        "eor x20, x22, x22, lsr #1",
+        "ubfx x21, x20, #30, #1",
+        "mov w20, w24",
+        "bfi w20, w21, #28, #1",
         "msr nzcv, x20"
       ]
     },
@@ -1752,1449 +2089,1781 @@
       "ExpectedInstructionCount": 13,
       "Comment": "GROUP2 0xd1 /3",
       "ExpectedArm64ASM": [
-        "cset w20, hs",
-        "ubfx x21, x4, #0, #1",
-        "mrs x22, nzcv",
-        "mov w0, w22",
-        "bfi w0, w21, #29, #1",
-        "mov w21, w0",
-        "extr x4, x20, x4, #1",
-        "eor x20, x4, x4, lsr #1",
-        "ubfx x20, x20, #62, #1",
-        "mov w0, w21",
-        "bfi w0, w20, #28, #1",
-        "mov w20, w0",
+        "mov x20, x4",
+        "cset w21, hs",
+        "ubfx x22, x20, #0, #1",
+        "mrs x23, nzcv",
+        "mov w24, w23",
+        "bfi w24, w22, #29, #1",
+        "extr x22, x21, x20, #1",
+        "mov x4, x22",
+        "eor x20, x22, x22, lsr #1",
+        "ubfx x21, x20, #62, #1",
+        "mov w20, w24",
+        "bfi w20, w21, #28, #1",
         "msr nzcv, x20"
       ]
     },
     "shl ax, 1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 15,
       "Comment": "GROUP2 0xd1 /4",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "lsl w26, w20, #1",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "lsl w22, w21, #1",
+        "mov x23, x20",
+        "bfxil x23, x22, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w22, lsl #16",
+        "ubfx x20, x21, #15, #1",
+        "mrs x23, nzcv",
+        "orr w24, w23, w20, lsl #29",
+        "mov x26, x22",
+        "eor w20, w22, w21",
         "ubfx x21, x20, #15, #1",
-        "mrs x22, nzcv",
-        "orr w21, w22, w21, lsl #29",
-        "eor w20, w26, w20",
-        "ubfx x20, x20, #15, #1",
-        "orr w20, w21, w20, lsl #28",
+        "orr w20, w24, w21, lsl #28",
         "msr nzcv, x20"
       ]
     },
     "shl eax, 1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 13,
       "Comment": "GROUP2 0xd1 /4",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "lsl w4, w20, #1",
-        "tst w4, w4",
-        "ubfx x21, x20, #31, #1",
-        "mrs x22, nzcv",
-        "orr w21, w22, w21, lsl #29",
-        "mov x26, x4",
-        "eor w20, w4, w20",
-        "ubfx x20, x20, #31, #1",
-        "orr w20, w21, w20, lsl #28",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "mov w21, w20",
+        "lsl w20, w21, #1",
+        "mov x4, x20",
+        "tst w20, w20",
+        "ubfx x22, x21, #31, #1",
+        "mrs x23, nzcv",
+        "orr w24, w23, w22, lsl #29",
+        "mov x26, x20",
+        "eor w22, w20, w21",
+        "ubfx x20, x22, #31, #1",
+        "orr w21, w24, w20, lsl #28",
+        "msr nzcv, x21"
       ]
     },
     "shl rax, 1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP2 0xd1 /4",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "lsl x4, x20, #1",
-        "tst x4, x4",
-        "lsr x21, x20, #63",
-        "mrs x22, nzcv",
-        "orr w21, w22, w21, lsl #29",
-        "mov x26, x4",
-        "eor x20, x4, x20",
-        "lsr x20, x20, #63",
-        "orr w20, w21, w20, lsl #28",
-        "msr nzcv, x20"
+        "lsl x21, x20, #1",
+        "mov x4, x21",
+        "tst x21, x21",
+        "lsr x22, x20, #63",
+        "mrs x23, nzcv",
+        "orr w24, w23, w22, lsl #29",
+        "mov x26, x21",
+        "eor x22, x21, x20",
+        "lsr x20, x22, #63",
+        "orr w21, w24, w20, lsl #28",
+        "msr nzcv, x21"
       ]
     },
     "shr ax, 1": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP2 0xd1 /5",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "lsr w26, w20, #1",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "ubfx x21, x20, #0, #1",
-        "mrs x22, nzcv",
-        "orr w21, w22, w21, lsl #29",
-        "ubfx x20, x20, #15, #1",
-        "orr w20, w21, w20, lsl #28",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "uxth w21, w20",
+        "lsr w22, w21, #1",
+        "mov x23, x20",
+        "bfxil x23, x22, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w22, lsl #16",
+        "ubfx x20, x21, #0, #1",
+        "mrs x23, nzcv",
+        "orr w24, w23, w20, lsl #29",
+        "mov x26, x22",
+        "ubfx x20, x21, #15, #1",
+        "orr w21, w24, w20, lsl #28",
+        "msr nzcv, x21"
       ]
     },
     "shr eax, 1": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP2 0xd1 /5",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "lsr w4, w20, #1",
-        "tst w4, w4",
-        "ubfx x21, x20, #0, #1",
-        "mrs x22, nzcv",
-        "orr w21, w22, w21, lsl #29",
-        "mov x26, x4",
-        "ubfx x20, x20, #31, #1",
-        "orr w20, w21, w20, lsl #28",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "mov w21, w20",
+        "lsr w20, w21, #1",
+        "mov x4, x20",
+        "tst w20, w20",
+        "ubfx x22, x21, #0, #1",
+        "mrs x23, nzcv",
+        "orr w24, w23, w22, lsl #29",
+        "mov x26, x20",
+        "ubfx x20, x21, #31, #1",
+        "orr w21, w24, w20, lsl #28",
+        "msr nzcv, x21"
       ]
     },
     "shr rax, 1": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP2 0xd1 /5",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "lsr x4, x20, #1",
-        "tst x4, x4",
-        "ubfx x21, x20, #0, #1",
-        "mrs x22, nzcv",
-        "orr w21, w22, w21, lsl #29",
-        "mov x26, x4",
-        "lsr x20, x20, #63",
-        "orr w20, w21, w20, lsl #28",
+        "lsr x21, x20, #1",
+        "mov x4, x21",
+        "tst x21, x21",
+        "ubfx x22, x20, #0, #1",
+        "mrs x23, nzcv",
+        "orr w24, w23, w22, lsl #29",
+        "mov x26, x21",
+        "lsr x21, x20, #63",
+        "orr w20, w24, w21, lsl #28",
         "msr nzcv, x20"
       ]
     },
     "sar ax, 1": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": "GROUP2 0xd1 /7",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "sxth x20, w20",
-        "asr x26, x20, #1",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "ubfx x20, x20, #0, #1",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "uxth w21, w20",
+        "sxth x22, w21",
+        "asr x21, x22, #1",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w21, lsl #16",
+        "ubfx x20, x22, #0, #1",
+        "mrs x22, nzcv",
+        "orr w23, w22, w20, lsl #29",
+        "mov x26, x21",
+        "msr nzcv, x23"
       ]
     },
     "sar eax, 1": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP2 0xd1 /7",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "asr w4, w20, #1",
-        "tst w4, w4",
-        "ubfx x20, x20, #0, #1",
+        "mov x20, x4",
+        "mov w21, w20",
+        "asr w20, w21, #1",
+        "mov x4, x20",
+        "tst w20, w20",
+        "ubfx x22, x21, #0, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "mov x26, x4",
-        "msr nzcv, x20"
+        "orr w23, w21, w22, lsl #29",
+        "mov x26, x20",
+        "msr nzcv, x23"
       ]
     },
     "sar rax, 1": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP2 0xd1 /7",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "asr x4, x20, #1",
-        "tst x4, x4",
-        "ubfx x20, x20, #0, #1",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "mov x26, x4",
-        "msr nzcv, x20"
+        "asr x21, x20, #1",
+        "mov x4, x21",
+        "tst x21, x21",
+        "ubfx x22, x20, #0, #1",
+        "mrs x20, nzcv",
+        "orr w23, w20, w22, lsl #29",
+        "mov x26, x21",
+        "msr nzcv, x23"
       ]
     },
     "rol al, cl": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 22,
       "Comment": "GROUP2 0xd2 /0",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x1f",
-        "cbz x20, #+0x3c",
-        "mov w20, w4",
-        "bfi w20, w4, #8, #8",
-        "bfi w20, w20, #16, #16",
-        "neg w21, w5",
-        "ror w20, w20, w21",
-        "bfxil x4, x20, #0, #8",
+        "mov x20, x5",
+        "and x21, x20, #0x1f",
+        "cbz x21, #+0x50",
+        "mov x20, x5",
+        "mov x21, x4",
+        "mov w22, w21",
+        "bfi w22, w21, #8, #8",
+        "mov w23, w22",
+        "bfi w23, w22, #16, #16",
+        "neg w22, w20",
+        "ror w20, w23, w22",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22",
         "mrs x21, nzcv",
-        "and w21, w21, #0xc0000000",
-        "ubfx x22, x20, #0, #1",
-        "orr w21, w21, w22, lsl #29",
-        "eor w20, w20, w20, lsr #7",
-        "ubfx x20, x20, #0, #1",
-        "orr w20, w21, w20, lsl #28",
-        "msr nzcv, x20"
+        "and w22, w21, #0xc0000000",
+        "ubfx x21, x20, #0, #1",
+        "orr w23, w22, w21, lsl #29",
+        "eor w21, w20, w20, lsr #7",
+        "ubfx x20, x21, #0, #1",
+        "orr w21, w23, w20, lsl #28",
+        "msr nzcv, x21"
       ]
     },
     "ror al, cl": {
-      "ExpectedInstructionCount": 15,
+      "ExpectedInstructionCount": 21,
       "Comment": "GROUP2 0xd2 /1",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x1f",
-        "cbz x20, #+0x38",
-        "mov w20, w4",
-        "bfi w20, w4, #8, #8",
-        "bfi w20, w20, #16, #16",
-        "ror w20, w20, w5",
-        "bfxil x4, x20, #0, #8",
-        "mrs x21, nzcv",
-        "and w21, w21, #0xc0000000",
-        "ubfx x22, x20, #7, #1",
-        "orr w21, w21, w22, lsl #29",
-        "eor w20, w20, w20, lsr #1",
-        "ubfx x20, x20, #6, #1",
-        "orr w20, w21, w20, lsl #28",
+        "mov x20, x5",
+        "and x21, x20, #0x1f",
+        "cbz x21, #+0x4c",
+        "mov x20, x5",
+        "mov x21, x4",
+        "mov w22, w21",
+        "bfi w22, w21, #8, #8",
+        "mov w23, w22",
+        "bfi w23, w22, #16, #16",
+        "ror w22, w23, w20",
+        "mov x20, x21",
+        "bfxil x20, x22, #0, #8",
+        "mov x4, x20",
+        "mrs x20, nzcv",
+        "and w21, w20, #0xc0000000",
+        "ubfx x20, x22, #7, #1",
+        "orr w23, w21, w20, lsl #29",
+        "eor w20, w22, w22, lsr #1",
+        "ubfx x21, x20, #6, #1",
+        "orr w20, w23, w21, lsl #28",
         "msr nzcv, x20"
       ]
     },
     "rcl al, cl": {
-      "ExpectedInstructionCount": 36,
+      "ExpectedInstructionCount": 48,
       "Comment": "GROUP2 0xd2 /2",
       "ExpectedArm64ASM": [
-        "and w20, w5, #0x1f",
-        "cbz x20, #+0x8c",
-        "and w20, w5, #0x1f",
-        "uxtb w21, w4",
-        "mov w22, #0x0",
-        "cset w23, hs",
-        "bfi x22, x21, #55, #8",
-        "bfi x22, x23, #63, #1",
-        "bfi x22, x21, #46, #8",
-        "bfi x22, x23, #54, #1",
-        "bfi x22, x21, #37, #8",
-        "bfi x22, x23, #45, #1",
-        "bfi x22, x21, #28, #8",
-        "bfi x22, x23, #36, #1",
-        "bfi x22, x21, #19, #8",
-        "bfi x22, x23, #27, #1",
-        "mov x0, x22",
-        "bfxil x0, x21, #0, #8",
-        "mov x21, x0",
-        "neg w22, w20",
-        "ror x22, x21, x22",
-        "bfxil x4, x22, #0, #8",
-        "mov w23, #0x3f",
-        "sub x20, x23, x20",
-        "ror x20, x21, x20",
+        "mov x20, x5",
+        "and w21, w20, #0x1f",
+        "cbz x21, #+0xb8",
+        "mov x20, x5",
+        "and w21, w20, #0x1f",
+        "mov x20, x4",
+        "uxtb w22, w20",
+        "mov w23, #0x0",
+        "cset w24, hs",
+        "mov x25, x23",
+        "bfi x25, x22, #55, #8",
+        "mov x23, x25",
+        "bfi x23, x24, #63, #1",
+        "mov x25, x23",
+        "bfi x25, x22, #46, #8",
+        "mov x23, x25",
+        "bfi x23, x24, #54, #1",
+        "mov x25, x23",
+        "bfi x25, x22, #37, #8",
+        "mov x23, x25",
+        "bfi x23, x24, #45, #1",
+        "mov x25, x23",
+        "bfi x25, x22, #28, #8",
+        "mov x23, x25",
+        "bfi x23, x24, #36, #1",
+        "mov x25, x23",
+        "bfi x25, x22, #19, #8",
+        "mov x23, x25",
+        "bfi x23, x24, #27, #1",
+        "mov x24, x23",
+        "bfxil x24, x22, #0, #8",
+        "neg w22, w21",
+        "ror x23, x24, x22",
+        "mov x22, x20",
+        "bfxil x22, x23, #0, #8",
+        "mov x4, x22",
+        "mov w20, #0x3f",
+        "sub x22, x20, x21",
+        "ror x20, x24, x22",
         "ubfx x21, x20, #0, #1",
-        "mrs x23, nzcv",
-        "mov w0, w23",
-        "bfi w0, w21, #29, #1",
-        "mov w21, w0",
-        "eor x20, x20, x22, lsr #7",
-        "ubfx x20, x20, #0, #1",
-        "mov w0, w21",
-        "bfi w0, w20, #28, #1",
-        "mov w20, w0",
-        "msr nzcv, x20"
+        "mrs x22, nzcv",
+        "mov w24, w22",
+        "bfi w24, w21, #29, #1",
+        "eor x21, x20, x23, lsr #7",
+        "ubfx x20, x21, #0, #1",
+        "mov w21, w24",
+        "bfi w21, w20, #28, #1",
+        "msr nzcv, x21"
       ]
     },
     "rcr al, cl": {
-      "ExpectedInstructionCount": 23,
+      "ExpectedInstructionCount": 30,
       "Comment": "GROUP2 0xd2 /3",
       "ExpectedArm64ASM": [
-        "and w20, w5, #0x1f",
-        "cbz x20, #+0x58",
-        "cset w20, hs",
-        "uxtb w21, w4",
-        "mov x0, x21",
-        "bfi x0, x20, #8, #1",
-        "mov x20, x0",
-        "bfi x20, x20, #9, #9",
-        "bfi x20, x20, #18, #18",
-        "bfi x20, x20, #36, #9",
-        "lsr w21, w20, w5",
-        "bfxil x4, x21, #0, #8",
-        "sub w22, w5, #0x1 (1)",
-        "lsr w20, w20, w22",
-        "ubfx x20, x20, #0, #1",
-        "mrs x22, nzcv",
-        "mov w0, w22",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
-        "eor w21, w21, w21, lsr #1",
-        "ubfx x21, x21, #6, #1",
+        "mov x20, x5",
+        "and w21, w20, #0x1f",
+        "cbz x21, #+0x70",
+        "mov x20, x5",
+        "cset w21, hs",
+        "mov x22, x4",
+        "uxtb w23, w22",
+        "mov x24, x23",
+        "bfi x24, x21, #8, #1",
+        "mov x21, x24",
+        "bfi x21, x24, #9, #9",
+        "mov x23, x21",
+        "bfi x23, x21, #18, #18",
+        "mov x21, x23",
+        "bfi x21, x23, #36, #9",
+        "lsr w23, w21, w20",
+        "mov x24, x22",
+        "bfxil x24, x23, #0, #8",
+        "mov x4, x24",
+        "sub w22, w20, #0x1 (1)",
+        "lsr w20, w21, w22",
+        "ubfx x21, x20, #0, #1",
+        "mrs x20, nzcv",
+        "mov w22, w20",
+        "bfi w22, w21, #29, #1",
+        "eor w20, w23, w23, lsr #1",
+        "ubfx x21, x20, #6, #1",
+        "mov w20, w22",
         "bfi w20, w21, #28, #1",
         "msr nzcv, x20"
       ]
     },
     "shl al, cl": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 23,
       "Comment": "GROUP2 0xd2 /4",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "uxtb w21, w5",
-        "lsl w22, w20, w21",
-        "bfxil x4, x22, #0, #8",
-        "cbz w21, #+0x30",
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "mov x22, x5",
+        "uxtb w23, w22",
+        "lsl w22, w21, w23",
+        "mov x24, x20",
+        "bfxil x24, x22, #0, #8",
+        "mov x4, x24",
+        "mov x20, x26",
+        "mov x24, x20",
+        "cbz w23, #+0x30",
         "cmn wzr, w22, lsl #24",
-        "mov x26, x22",
+        "mov x24, x22",
         "mov w0, #0x8",
-        "sub w0, w0, w21",
-        "lsr w0, w20, w0",
-        "eor w2, w20, w22",
+        "sub w0, w0, w23",
+        "lsr w0, w21, w0",
+        "eor w2, w21, w22",
         "mrs x1, nzcv",
         "bfi w1, w0, #29, #1",
         "lsr w2, w2, #7",
         "bfi w1, w2, #28, #1",
-        "msr nzcv, x1"
+        "msr nzcv, x1",
+        "mov x26, x24"
       ]
     },
     "shr al, cl": {
-      "ExpectedInstructionCount": 15,
+      "ExpectedInstructionCount": 22,
       "Comment": "GROUP2 0xd2 /5",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "uxtb w21, w5",
-        "lsr w22, w20, w21",
-        "bfxil x4, x22, #0, #8",
-        "cbz w21, #+0x2c",
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "mov x22, x5",
+        "uxtb w23, w22",
+        "lsr w22, w21, w23",
+        "mov x24, x20",
+        "bfxil x24, x22, #0, #8",
+        "mov x4, x24",
+        "mov x20, x26",
+        "mov x24, x20",
+        "cbz w23, #+0x2c",
         "cmn wzr, w22, lsl #24",
-        "mov x26, x22",
-        "sub x0, x21, #0x1 (1)",
-        "lsr w0, w20, w0",
-        "eor w2, w20, w22",
+        "mov x24, x22",
+        "sub x0, x23, #0x1 (1)",
+        "lsr w0, w21, w0",
+        "eor w2, w21, w22",
         "mrs x1, nzcv",
         "bfi w1, w0, #29, #1",
         "lsr w2, w2, #7",
         "bfi w1, w2, #28, #1",
-        "msr nzcv, x1"
+        "msr nzcv, x1",
+        "mov x26, x24"
       ]
     },
     "sar al, cl": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 20,
       "Comment": "GROUP2 0xd2 /7",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "uxtb w21, w5",
-        "sxtb x20, w20",
-        "asr w22, w20, w21",
-        "bfxil x4, x22, #0, #8",
-        "cbz w21, #+0x20",
-        "cmn wzr, w22, lsl #24",
-        "mov x26, x22",
-        "sub x0, x21, #0x1 (1)",
-        "lsr w0, w20, w0",
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "mov x22, x5",
+        "uxtb w23, w22",
+        "sxtb x22, w21",
+        "asr w21, w22, w23",
+        "mov x24, x20",
+        "bfxil x24, x21, #0, #8",
+        "mov x4, x24",
+        "mov x20, x26",
+        "mov x24, x20",
+        "cbz w23, #+0x20",
+        "cmn wzr, w21, lsl #24",
+        "mov x24, x21",
+        "sub x0, x23, #0x1 (1)",
+        "lsr w0, w22, w0",
         "mrs x1, nzcv",
         "bfi w1, w0, #29, #1",
-        "msr nzcv, x1"
+        "msr nzcv, x1",
+        "mov x26, x24"
       ]
     },
     "rol ax, cl": {
-      "ExpectedInstructionCount": 15,
+      "ExpectedInstructionCount": 20,
       "Comment": "GROUP2 0xd3 /0",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x1f",
-        "cbz x20, #+0x38",
-        "mov w20, w4",
-        "bfi w20, w4, #16, #16",
-        "neg w21, w5",
-        "ror w20, w20, w21",
-        "bfxil x4, x20, #0, #16",
-        "mrs x21, nzcv",
-        "and w21, w21, #0xc0000000",
-        "ubfx x22, x20, #0, #1",
-        "orr w21, w21, w22, lsl #29",
-        "eor w20, w20, w20, lsr #15",
-        "ubfx x20, x20, #0, #1",
-        "orr w20, w21, w20, lsl #28",
-        "msr nzcv, x20"
+        "mov x20, x5",
+        "and x21, x20, #0x1f",
+        "cbz x21, #+0x48",
+        "mov x20, x5",
+        "mov x21, x4",
+        "mov w22, w21",
+        "bfi w22, w21, #16, #16",
+        "neg w23, w20",
+        "ror w20, w22, w23",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22",
+        "mrs x21, nzcv",
+        "and w22, w21, #0xc0000000",
+        "ubfx x21, x20, #0, #1",
+        "orr w23, w22, w21, lsl #29",
+        "eor w21, w20, w20, lsr #15",
+        "ubfx x20, x21, #0, #1",
+        "orr w21, w23, w20, lsl #28",
+        "msr nzcv, x21"
       ]
     },
     "rol eax, cl": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP2 0xd3 /0",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x1f",
-        "cbz x20, #+0x2c",
-        "neg w20, w5",
-        "ror w4, w4, w20",
-        "mrs x20, nzcv",
-        "and w20, w20, #0xc0000000",
-        "ubfx x21, x4, #0, #1",
-        "orr w20, w20, w21, lsl #29",
-        "eor w21, w4, w4, lsr #31",
-        "ubfx x21, x21, #0, #1",
-        "orr w20, w20, w21, lsl #28",
-        "msr nzcv, x20"
+        "mov x20, x5",
+        "and x21, x20, #0x1f",
+        "cbz x21, #+0x38",
+        "mov x20, x5",
+        "mov x21, x4",
+        "neg w22, w20",
+        "ror w20, w21, w22",
+        "mov x4, x20",
+        "mrs x21, nzcv",
+        "and w22, w21, #0xc0000000",
+        "ubfx x21, x20, #0, #1",
+        "orr w23, w22, w21, lsl #29",
+        "eor w21, w20, w20, lsr #31",
+        "ubfx x20, x21, #0, #1",
+        "orr w21, w23, w20, lsl #28",
+        "msr nzcv, x21"
       ]
     },
     "rol rax, cl": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP2 0xd3 /0",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x3f",
-        "cbz x20, #+0x2c",
-        "neg x20, x5",
-        "ror x4, x4, x20",
-        "mrs x20, nzcv",
-        "and w20, w20, #0xc0000000",
-        "ubfx x21, x4, #0, #1",
-        "orr w20, w20, w21, lsl #29",
-        "eor x21, x4, x4, lsr #63",
-        "ubfx x21, x21, #0, #1",
-        "orr w20, w20, w21, lsl #28",
-        "msr nzcv, x20"
+        "mov x20, x5",
+        "and x21, x20, #0x3f",
+        "cbz x21, #+0x38",
+        "mov x20, x5",
+        "mov x21, x4",
+        "neg x22, x20",
+        "ror x20, x21, x22",
+        "mov x4, x20",
+        "mrs x21, nzcv",
+        "and w22, w21, #0xc0000000",
+        "ubfx x21, x20, #0, #1",
+        "orr w23, w22, w21, lsl #29",
+        "eor x21, x20, x20, lsr #63",
+        "ubfx x20, x21, #0, #1",
+        "orr w21, w23, w20, lsl #28",
+        "msr nzcv, x21"
       ]
     },
     "ror ax, cl": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 19,
       "Comment": "GROUP2 0xd3 /1",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x1f",
-        "cbz x20, #+0x34",
-        "mov w20, w4",
-        "bfi w20, w4, #16, #16",
-        "ror w20, w20, w5",
-        "bfxil x4, x20, #0, #16",
-        "mrs x21, nzcv",
-        "and w21, w21, #0xc0000000",
-        "ubfx x22, x20, #15, #1",
-        "orr w21, w21, w22, lsl #29",
-        "eor w20, w20, w20, lsr #1",
-        "ubfx x20, x20, #14, #1",
-        "orr w20, w21, w20, lsl #28",
+        "mov x20, x5",
+        "and x21, x20, #0x1f",
+        "cbz x21, #+0x44",
+        "mov x20, x5",
+        "mov x21, x4",
+        "mov w22, w21",
+        "bfi w22, w21, #16, #16",
+        "ror w23, w22, w20",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x4, x20",
+        "mrs x20, nzcv",
+        "and w21, w20, #0xc0000000",
+        "ubfx x20, x23, #15, #1",
+        "orr w22, w21, w20, lsl #29",
+        "eor w20, w23, w23, lsr #1",
+        "ubfx x21, x20, #14, #1",
+        "orr w20, w22, w21, lsl #28",
         "msr nzcv, x20"
       ]
     },
     "ror eax, cl": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 15,
       "Comment": "GROUP2 0xd3 /1",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x1f",
-        "cbz x20, #+0x28",
-        "ror w4, w4, w5",
+        "mov x20, x5",
+        "and x21, x20, #0x1f",
+        "cbz x21, #+0x34",
+        "mov x20, x5",
+        "mov x21, x4",
+        "ror w22, w21, w20",
+        "mov x4, x22",
         "mrs x20, nzcv",
-        "and w20, w20, #0xc0000000",
-        "ubfx x21, x4, #31, #1",
-        "orr w20, w20, w21, lsl #29",
-        "eor w21, w4, w4, lsr #1",
-        "ubfx x21, x21, #30, #1",
-        "orr w20, w20, w21, lsl #28",
+        "and w21, w20, #0xc0000000",
+        "ubfx x20, x22, #31, #1",
+        "orr w23, w21, w20, lsl #29",
+        "eor w20, w22, w22, lsr #1",
+        "ubfx x21, x20, #30, #1",
+        "orr w20, w23, w21, lsl #28",
         "msr nzcv, x20"
       ]
     },
     "ror rax, cl": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 15,
       "Comment": "GROUP2 0xd3 /1",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x3f",
-        "cbz x20, #+0x28",
-        "ror x4, x4, x5",
+        "mov x20, x5",
+        "and x21, x20, #0x3f",
+        "cbz x21, #+0x34",
+        "mov x20, x5",
+        "mov x21, x4",
+        "ror x22, x21, x20",
+        "mov x4, x22",
         "mrs x20, nzcv",
-        "and w20, w20, #0xc0000000",
-        "lsr x21, x4, #63",
-        "orr w20, w20, w21, lsl #29",
-        "eor x21, x4, x4, lsr #1",
-        "ubfx x21, x21, #62, #1",
-        "orr w20, w20, w21, lsl #28",
+        "and w21, w20, #0xc0000000",
+        "lsr x20, x22, #63",
+        "orr w23, w21, w20, lsl #29",
+        "eor x20, x22, x22, lsr #1",
+        "ubfx x21, x20, #62, #1",
+        "orr w20, w23, w21, lsl #28",
         "msr nzcv, x20"
       ]
     },
     "rcl ax, cl": {
-      "ExpectedInstructionCount": 32,
+      "ExpectedInstructionCount": 40,
       "Comment": "GROUP2 0xd3 /2",
       "ExpectedArm64ASM": [
-        "and w20, w5, #0x1f",
-        "cbz x20, #+0x7c",
-        "and w20, w5, #0x1f",
-        "uxth w21, w4",
-        "mov w22, #0x0",
-        "cset w23, hs",
-        "bfi x22, x21, #47, #16",
-        "bfi x22, x23, #63, #1",
-        "bfi x22, x21, #30, #16",
-        "bfi x22, x23, #46, #1",
-        "bfi x22, x21, #13, #16",
-        "bfi x22, x23, #29, #1",
-        "mov x0, x22",
-        "bfxil x0, x21, #0, #16",
-        "mov x21, x0",
-        "neg w22, w20",
-        "ror x22, x21, x22",
-        "bfxil x4, x22, #0, #16",
-        "mov w23, #0x3f",
-        "sub x20, x23, x20",
-        "ror x20, x21, x20",
+        "mov x20, x5",
+        "and w21, w20, #0x1f",
+        "cbz x21, #+0x98",
+        "mov x20, x5",
+        "and w21, w20, #0x1f",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "mov w23, #0x0",
+        "cset w24, hs",
+        "mov x25, x23",
+        "bfi x25, x22, #47, #16",
+        "mov x23, x25",
+        "bfi x23, x24, #63, #1",
+        "mov x25, x23",
+        "bfi x25, x22, #30, #16",
+        "mov x23, x25",
+        "bfi x23, x24, #46, #1",
+        "mov x25, x23",
+        "bfi x25, x22, #13, #16",
+        "mov x23, x25",
+        "bfi x23, x24, #29, #1",
+        "mov x24, x23",
+        "bfxil x24, x22, #0, #16",
+        "neg w22, w21",
+        "ror x23, x24, x22",
+        "mov x22, x20",
+        "bfxil x22, x23, #0, #16",
+        "mov x4, x22",
+        "mov w20, #0x3f",
+        "sub x22, x20, x21",
+        "ror x20, x24, x22",
         "ubfx x21, x20, #0, #1",
-        "mrs x23, nzcv",
-        "mov w0, w23",
-        "bfi w0, w21, #29, #1",
-        "mov w21, w0",
-        "eor x20, x20, x22, lsr #15",
-        "ubfx x20, x20, #0, #1",
-        "mov w0, w21",
-        "bfi w0, w20, #28, #1",
-        "mov w20, w0",
-        "msr nzcv, x20"
+        "mrs x22, nzcv",
+        "mov w24, w22",
+        "bfi w24, w21, #29, #1",
+        "eor x21, x20, x23, lsr #15",
+        "ubfx x20, x21, #0, #1",
+        "mov w21, w24",
+        "bfi w21, w20, #28, #1",
+        "msr nzcv, x21"
       ]
     },
     "rcl eax, cl": {
-      "ExpectedInstructionCount": 22,
+      "ExpectedInstructionCount": 24,
       "Comment": "GROUP2 0xd3 /2",
       "ExpectedArm64ASM": [
-        "and w20, w5, #0x1f",
-        "cbz x20, #+0x54",
-        "lsl w20, w4, w5",
-        "cset w21, hs",
-        "neg w22, w5",
-        "lsr w23, w4, w22",
-        "orr w20, w20, w23, lsr #1",
-        "lsr w22, w4, w22",
-        "ubfx x23, x22, #0, #1",
+        "mov x20, x5",
+        "and w21, w20, #0x1f",
+        "cbz x21, #+0x58",
+        "mov x20, x5",
+        "mov x21, x4",
+        "lsl w22, w21, w20",
+        "cset w23, hs",
+        "neg w24, w20",
+        "lsr w25, w21, w24",
+        "orr w30, w22, w25, lsr #1",
+        "lsr w22, w21, w24",
+        "ubfx x21, x22, #0, #1",
         "mrs x24, nzcv",
-        "mov w0, w24",
-        "bfi w0, w23, #29, #1",
-        "mov w23, w0",
-        "sub w24, w5, #0x1 (1)",
-        "lsl w21, w21, w24",
-        "orr w4, w20, w21",
-        "eor w20, w4, w22, lsl #31",
-        "ubfx x20, x20, #31, #1",
-        "mov w0, w23",
-        "bfi w0, w20, #28, #1",
-        "mov w20, w0",
+        "mov w25, w24",
+        "bfi w25, w21, #29, #1",
+        "sub w21, w20, #0x1 (1)",
+        "lsl w20, w23, w21",
+        "orr w21, w30, w20",
+        "eor w20, w21, w22, lsl #31",
+        "ubfx x22, x20, #31, #1",
+        "mov w20, w25",
+        "bfi w20, w22, #28, #1",
+        "mov x4, x21",
         "msr nzcv, x20"
       ]
     },
     "rcl rax, cl": {
-      "ExpectedInstructionCount": 22,
+      "ExpectedInstructionCount": 24,
       "Comment": "GROUP2 0xd3 /2",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x3f",
-        "cbz x20, #+0x54",
-        "lsl x20, x4, x5",
-        "cset w21, hs",
-        "neg x22, x5",
-        "lsr x23, x4, x22",
-        "orr x20, x20, x23, lsr #1",
-        "lsr x22, x4, x22",
-        "ubfx x23, x22, #0, #1",
+        "mov x20, x5",
+        "and x21, x20, #0x3f",
+        "cbz x21, #+0x58",
+        "mov x20, x5",
+        "mov x21, x4",
+        "lsl x22, x21, x20",
+        "cset w23, hs",
+        "neg x24, x20",
+        "lsr x25, x21, x24",
+        "orr x30, x22, x25, lsr #1",
+        "lsr x22, x21, x24",
+        "ubfx x21, x22, #0, #1",
         "mrs x24, nzcv",
-        "mov w0, w24",
-        "bfi w0, w23, #29, #1",
-        "mov w23, w0",
-        "sub x24, x5, #0x1 (1)",
-        "lsl x21, x21, x24",
-        "orr x4, x20, x21",
-        "eor x20, x4, x22, lsl #63",
-        "lsr x20, x20, #63",
-        "mov w0, w23",
-        "bfi w0, w20, #28, #1",
-        "mov w20, w0",
+        "mov w25, w24",
+        "bfi w25, w21, #29, #1",
+        "sub x21, x20, #0x1 (1)",
+        "lsl x20, x23, x21",
+        "orr x21, x30, x20",
+        "eor x20, x21, x22, lsl #63",
+        "lsr x22, x20, #63",
+        "mov w20, w25",
+        "bfi w20, w22, #28, #1",
+        "mov x4, x21",
         "msr nzcv, x20"
       ]
     },
     "rcr ax, cl": {
-      "ExpectedInstructionCount": 22,
+      "ExpectedInstructionCount": 28,
       "Comment": "GROUP2 0xd3 /3",
       "ExpectedArm64ASM": [
-        "and w20, w5, #0x1f",
-        "cbz x20, #+0x54",
-        "cset w20, hs",
-        "uxth w21, w4",
-        "mov x0, x21",
-        "bfi x0, x20, #16, #1",
-        "mov x20, x0",
-        "bfi x20, x20, #17, #17",
-        "bfi x20, x20, #34, #17",
-        "lsr w21, w20, w5",
-        "bfxil x4, x21, #0, #16",
-        "sub w22, w5, #0x1 (1)",
-        "lsr w20, w20, w22",
-        "ubfx x20, x20, #0, #1",
-        "mrs x22, nzcv",
-        "mov w0, w22",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
-        "eor w21, w21, w21, lsr #1",
-        "ubfx x21, x21, #14, #1",
+        "mov x20, x5",
+        "and w21, w20, #0x1f",
+        "cbz x21, #+0x68",
+        "mov x20, x5",
+        "cset w21, hs",
+        "mov x22, x4",
+        "uxth w23, w22",
+        "mov x24, x23",
+        "bfi x24, x21, #16, #1",
+        "mov x21, x24",
+        "bfi x21, x24, #17, #17",
+        "mov x23, x21",
+        "bfi x23, x21, #34, #17",
+        "lsr w21, w23, w20",
+        "mov x24, x22",
+        "bfxil x24, x21, #0, #16",
+        "mov x4, x24",
+        "sub w22, w20, #0x1 (1)",
+        "lsr w20, w23, w22",
+        "ubfx x22, x20, #0, #1",
+        "mrs x20, nzcv",
+        "mov w23, w20",
+        "bfi w23, w22, #29, #1",
+        "eor w20, w21, w21, lsr #1",
+        "ubfx x21, x20, #14, #1",
+        "mov w20, w23",
         "bfi w20, w21, #28, #1",
         "msr nzcv, x20"
       ]
     },
     "rcr eax, cl": {
-      "ExpectedInstructionCount": 22,
+      "ExpectedInstructionCount": 24,
       "Comment": "GROUP2 0xd3 /3",
       "ExpectedArm64ASM": [
-        "and w20, w5, #0x1f",
-        "cbz x20, #+0x54",
-        "lsr w20, w4, w5",
-        "cset w21, hs",
-        "neg w22, w5",
-        "lsl w23, w4, w22",
-        "orr w20, w20, w23, lsl #1",
-        "sub w23, w5, #0x1 (1)",
-        "lsr w23, w4, w23",
-        "ubfx x23, x23, #0, #1",
-        "mrs x24, nzcv",
-        "mov w0, w24",
-        "bfi w0, w23, #29, #1",
-        "mov w23, w0",
-        "lsl w21, w21, w22",
-        "orr w4, w20, w21",
-        "eor w20, w4, w4, lsr #1",
-        "ubfx x20, x20, #30, #1",
-        "mov w0, w23",
-        "bfi w0, w20, #28, #1",
-        "mov w20, w0",
+        "mov x20, x5",
+        "and w21, w20, #0x1f",
+        "cbz x21, #+0x58",
+        "mov x20, x5",
+        "mov x21, x4",
+        "lsr w22, w21, w20",
+        "cset w23, hs",
+        "neg w24, w20",
+        "lsl w25, w21, w24",
+        "orr w30, w22, w25, lsl #1",
+        "sub w22, w20, #0x1 (1)",
+        "lsr w20, w21, w22",
+        "ubfx x21, x20, #0, #1",
+        "mrs x20, nzcv",
+        "mov w22, w20",
+        "bfi w22, w21, #29, #1",
+        "lsl w20, w23, w24",
+        "orr w21, w30, w20",
+        "eor w20, w21, w21, lsr #1",
+        "ubfx x23, x20, #30, #1",
+        "mov w20, w22",
+        "bfi w20, w23, #28, #1",
+        "mov x4, x21",
         "msr nzcv, x20"
       ]
     },
     "rcr rax, cl": {
-      "ExpectedInstructionCount": 22,
+      "ExpectedInstructionCount": 24,
       "Comment": "GROUP2 0xd3 /3",
       "ExpectedArm64ASM": [
-        "and x20, x5, #0x3f",
-        "cbz x20, #+0x54",
-        "lsr x20, x4, x5",
-        "cset w21, hs",
-        "neg x22, x5",
-        "lsl x23, x4, x22",
-        "orr x20, x20, x23, lsl #1",
-        "sub x23, x5, #0x1 (1)",
-        "lsr x23, x4, x23",
-        "ubfx x23, x23, #0, #1",
-        "mrs x24, nzcv",
-        "mov w0, w24",
-        "bfi w0, w23, #29, #1",
-        "mov w23, w0",
-        "lsl x21, x21, x22",
-        "orr x4, x20, x21",
-        "eor x20, x4, x4, lsr #1",
-        "ubfx x20, x20, #62, #1",
-        "mov w0, w23",
-        "bfi w0, w20, #28, #1",
-        "mov w20, w0",
+        "mov x20, x5",
+        "and x21, x20, #0x3f",
+        "cbz x21, #+0x58",
+        "mov x20, x5",
+        "mov x21, x4",
+        "lsr x22, x21, x20",
+        "cset w23, hs",
+        "neg x24, x20",
+        "lsl x25, x21, x24",
+        "orr x30, x22, x25, lsl #1",
+        "sub x22, x20, #0x1 (1)",
+        "lsr x20, x21, x22",
+        "ubfx x21, x20, #0, #1",
+        "mrs x20, nzcv",
+        "mov w22, w20",
+        "bfi w22, w21, #29, #1",
+        "lsl x20, x23, x24",
+        "orr x21, x30, x20",
+        "eor x20, x21, x21, lsr #1",
+        "ubfx x23, x20, #62, #1",
+        "mov w20, w22",
+        "bfi w20, w23, #28, #1",
+        "mov x4, x21",
         "msr nzcv, x20"
       ]
     },
     "shl ax, cl": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 23,
       "Comment": "GROUP2 0xd3 /4",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "uxth w21, w5",
-        "lsl w22, w20, w21",
-        "bfxil x4, x22, #0, #16",
-        "cbz w21, #+0x30",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "mov x22, x5",
+        "uxth w23, w22",
+        "lsl w22, w21, w23",
+        "mov x24, x20",
+        "bfxil x24, x22, #0, #16",
+        "mov x4, x24",
+        "mov x20, x26",
+        "mov x24, x20",
+        "cbz w23, #+0x30",
         "cmn wzr, w22, lsl #16",
-        "mov x26, x22",
+        "mov x24, x22",
         "mov w0, #0x10",
-        "sub w0, w0, w21",
-        "lsr w0, w20, w0",
-        "eor w2, w20, w22",
+        "sub w0, w0, w23",
+        "lsr w0, w21, w0",
+        "eor w2, w21, w22",
         "mrs x1, nzcv",
         "bfi w1, w0, #29, #1",
         "lsr w2, w2, #15",
         "bfi w1, w2, #28, #1",
-        "msr nzcv, x1"
+        "msr nzcv, x1",
+        "mov x26, x24"
       ]
     },
     "shl eax, cl": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 19,
       "Comment": "GROUP2 0xd3 /4",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "mov w21, w5",
-        "lsl w4, w20, w21",
-        "cbz w21, #+0x28",
-        "ands w26, w4, w4",
-        "neg w0, w21",
-        "lsr w0, w20, w0",
-        "eor w2, w20, w4",
+        "mov x20, x4",
+        "mov w21, w20",
+        "mov x20, x5",
+        "mov w22, w20",
+        "lsl w20, w21, w22",
+        "mov x4, x20",
+        "mov x23, x26",
+        "mov x24, x23",
+        "cbz w22, #+0x28",
+        "ands w24, w20, w20",
+        "neg w0, w22",
+        "lsr w0, w21, w0",
+        "eor w2, w21, w20",
         "mrs x1, nzcv",
         "bfi w1, w0, #29, #1",
         "lsr w2, w2, #31",
         "bfi w1, w2, #28, #1",
-        "msr nzcv, x1"
+        "msr nzcv, x1",
+        "mov x26, x24"
       ]
     },
     "shl rax, cl": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 17,
       "Comment": "GROUP2 0xd3 /4",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "lsl x4, x20, x5",
-        "cbz x5, #+0x28",
-        "ands x26, x4, x4",
-        "neg x0, x5",
+        "mov x21, x5",
+        "lsl x22, x20, x21",
+        "mov x4, x22",
+        "mov x23, x26",
+        "mov x24, x23",
+        "cbz x21, #+0x28",
+        "ands x24, x22, x22",
+        "neg x0, x21",
         "lsr x0, x20, x0",
-        "eor x2, x20, x4",
+        "eor x2, x20, x22",
         "mrs x1, nzcv",
         "bfi w1, w0, #29, #1",
         "lsr x2, x2, #63",
         "bfi w1, w2, #28, #1",
-        "msr nzcv, x1"
+        "msr nzcv, x1",
+        "mov x26, x24"
       ]
     },
     "shr ax, cl": {
-      "ExpectedInstructionCount": 15,
+      "ExpectedInstructionCount": 22,
       "Comment": "GROUP2 0xd3 /5",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "uxth w21, w5",
-        "lsr w22, w20, w21",
-        "bfxil x4, x22, #0, #16",
-        "cbz w21, #+0x2c",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "mov x22, x5",
+        "uxth w23, w22",
+        "lsr w22, w21, w23",
+        "mov x24, x20",
+        "bfxil x24, x22, #0, #16",
+        "mov x4, x24",
+        "mov x20, x26",
+        "mov x24, x20",
+        "cbz w23, #+0x2c",
         "cmn wzr, w22, lsl #16",
-        "mov x26, x22",
-        "sub x0, x21, #0x1 (1)",
-        "lsr w0, w20, w0",
-        "eor w2, w20, w22",
+        "mov x24, x22",
+        "sub x0, x23, #0x1 (1)",
+        "lsr w0, w21, w0",
+        "eor w2, w21, w22",
         "mrs x1, nzcv",
         "bfi w1, w0, #29, #1",
         "lsr w2, w2, #15",
         "bfi w1, w2, #28, #1",
-        "msr nzcv, x1"
+        "msr nzcv, x1",
+        "mov x26, x24"
       ]
     },
     "shr eax, cl": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 19,
       "Comment": "GROUP2 0xd3 /5",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "mov w21, w5",
-        "lsr w4, w20, w21",
-        "cbz w21, #+0x28",
-        "ands w26, w4, w4",
-        "sub x0, x21, #0x1 (1)",
-        "lsr w0, w20, w0",
-        "eor w2, w20, w4",
+        "mov x20, x4",
+        "mov w21, w20",
+        "mov x20, x5",
+        "mov w22, w20",
+        "lsr w20, w21, w22",
+        "mov x4, x20",
+        "mov x23, x26",
+        "mov x24, x23",
+        "cbz w22, #+0x28",
+        "ands w24, w20, w20",
+        "sub x0, x22, #0x1 (1)",
+        "lsr w0, w21, w0",
+        "eor w2, w21, w20",
         "mrs x1, nzcv",
         "bfi w1, w0, #29, #1",
         "lsr w2, w2, #31",
         "bfi w1, w2, #28, #1",
-        "msr nzcv, x1"
+        "msr nzcv, x1",
+        "mov x26, x24"
       ]
     },
     "shr rax, cl": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 17,
       "Comment": "GROUP2 0xd3 /5",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "lsr x4, x20, x5",
-        "cbz x5, #+0x28",
-        "ands x26, x4, x4",
-        "sub x0, x5, #0x1 (1)",
+        "mov x21, x5",
+        "lsr x22, x20, x21",
+        "mov x4, x22",
+        "mov x23, x26",
+        "mov x24, x23",
+        "cbz x21, #+0x28",
+        "ands x24, x22, x22",
+        "sub x0, x21, #0x1 (1)",
         "lsr x0, x20, x0",
-        "eor x2, x20, x4",
+        "eor x2, x20, x22",
         "mrs x1, nzcv",
         "bfi w1, w0, #29, #1",
         "lsr x2, x2, #63",
         "bfi w1, w2, #28, #1",
-        "msr nzcv, x1"
+        "msr nzcv, x1",
+        "mov x26, x24"
       ]
     },
     "sar ax, cl": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 20,
       "Comment": "GROUP2 0xd3 /7",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "uxth w21, w5",
-        "sxth x20, w20",
-        "asr w22, w20, w21",
-        "bfxil x4, x22, #0, #16",
-        "cbz w21, #+0x20",
-        "cmn wzr, w22, lsl #16",
-        "mov x26, x22",
-        "sub x0, x21, #0x1 (1)",
-        "lsr w0, w20, w0",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "mov x22, x5",
+        "uxth w23, w22",
+        "sxth x22, w21",
+        "asr w21, w22, w23",
+        "mov x24, x20",
+        "bfxil x24, x21, #0, #16",
+        "mov x4, x24",
+        "mov x20, x26",
+        "mov x24, x20",
+        "cbz w23, #+0x20",
+        "cmn wzr, w21, lsl #16",
+        "mov x24, x21",
+        "sub x0, x23, #0x1 (1)",
+        "lsr w0, w22, w0",
         "mrs x1, nzcv",
         "bfi w1, w0, #29, #1",
-        "msr nzcv, x1"
+        "msr nzcv, x1",
+        "mov x26, x24"
       ]
     },
     "sar eax, cl": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP2 0xd3 /7",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "mov w21, w5",
-        "asr w4, w20, w21",
-        "cbz w21, #+0x1c",
-        "ands w26, w4, w4",
-        "sub x0, x21, #0x1 (1)",
-        "lsr w0, w20, w0",
+        "mov x20, x4",
+        "mov w21, w20",
+        "mov x20, x5",
+        "mov w22, w20",
+        "asr w20, w21, w22",
+        "mov x4, x20",
+        "mov x23, x26",
+        "mov x24, x23",
+        "cbz w22, #+0x1c",
+        "ands w24, w20, w20",
+        "sub x0, x22, #0x1 (1)",
+        "lsr w0, w21, w0",
         "mrs x1, nzcv",
         "bfi w1, w0, #29, #1",
-        "msr nzcv, x1"
+        "msr nzcv, x1",
+        "mov x26, x24"
       ]
     },
     "sar rax, cl": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP2 0xd3 /7",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "asr x4, x20, x5",
-        "cbz x5, #+0x1c",
-        "ands x26, x4, x4",
-        "sub x0, x5, #0x1 (1)",
+        "mov x21, x5",
+        "asr x22, x20, x21",
+        "mov x4, x22",
+        "mov x23, x26",
+        "mov x24, x23",
+        "cbz x21, #+0x1c",
+        "ands x24, x22, x22",
+        "sub x0, x21, #0x1 (1)",
         "lsr x0, x20, x0",
         "mrs x1, nzcv",
         "bfi w1, w0, #29, #1",
-        "msr nzcv, x1"
+        "msr nzcv, x1",
+        "mov x26, x24"
       ]
     },
     "test bl, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP2 0xf6 /0",
       "ExpectedArm64ASM": [
-        "and w26, w7, #0x1",
-        "cmn wzr, w26, lsl #24"
+        "mov x20, x7",
+        "and w21, w20, #0x1",
+        "cmn wzr, w21, lsl #24",
+        "mov x26, x21"
       ]
     },
     "not bl": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf6 /2",
       "ExpectedArm64ASM": [
-        "eor x7, x7, #0xff"
+        "mov x20, x7",
+        "eor x21, x20, #0xff",
+        "mov x7, x21"
       ]
     },
     "not bh": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf6 /2",
       "ExpectedArm64ASM": [
-        "eor x7, x7, #0xff00"
+        "mov x20, x7",
+        "eor x21, x20, #0xff00",
+        "mov x7, x21"
       ]
     },
     "neg bl": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP2 0xf6 /3",
       "ExpectedArm64ASM": [
-        "mov x27, x7",
-        "cmp wzr, w27, lsl #24",
-        "neg w26, w27",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x7, x27",
-        "bfxil x7, x26, #0, #8",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x27, x20",
+        "cmp wzr, w20, lsl #24",
+        "neg w21, w20",
+        "mov x26, x21",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x7, x22",
+        "msr nzcv, x23"
       ]
     },
     "mul bl": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP2 0xf6 /4",
       "ExpectedArm64ASM": [
-        "uxtb x20, w7",
-        "uxtb x21, w4",
-        "mul x20, x20, x21",
-        "bfxil x4, x20, #0, #16",
-        "ubfx x20, x20, #8, #8",
+        "mov x20, x7",
+        "mov x21, x4",
+        "uxtb x22, w20",
+        "uxtb x20, w21",
+        "mul x23, x22, x20",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x4, x20",
+        "ubfx x20, x23, #8, #8",
         "cmp x20, #0x0 (0)",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul bl": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP2 0xf6 /5",
       "ExpectedArm64ASM": [
-        "sxtb x20, w7",
-        "sxtb x21, w4",
-        "mul x20, x20, x21",
-        "bfxil x4, x20, #0, #16",
-        "sbfx x21, x20, #8, #8",
-        "sbfx x20, x20, #7, #1",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "sxtb x22, w20",
+        "sxtb x20, w21",
+        "mul x23, x22, x20",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x4, x20",
+        "sbfx x20, x23, #8, #8",
+        "sbfx x21, x23, #7, #1",
+        "cmp x20, x21",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "div bl": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP2 0xf6 /6",
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "uxth w21, w4",
-        "uxth w0, w21",
-        "uxth w1, w20",
-        "udiv w22, w0, w1",
-        "uxth w0, w21",
-        "uxth w1, w20",
+        "mov x20, x7",
+        "uxtb w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "uxth w0, w22",
+        "uxth w1, w21",
+        "udiv w23, w0, w1",
+        "uxth w0, w22",
+        "uxth w1, w21",
         "udiv w2, w0, w1",
-        "msub w20, w2, w1, w0",
-        "mov x0, x22",
-        "bfi x0, x20, #8, #8",
-        "mov x20, x0",
-        "bfxil x4, x20, #0, #16"
+        "msub w24, w2, w1, w0",
+        "mov x21, x23",
+        "bfi x21, x24, #8, #8",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "idiv bl": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP2 0xf6 /7",
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "uxth w21, w4",
-        "sxth x21, w21",
-        "sxtb x20, w20",
-        "sdiv x22, x21, x20",
-        "sdiv x0, x21, x20",
-        "msub x20, x0, x20, x21",
-        "mov x0, x22",
-        "bfi x0, x20, #8, #8",
-        "mov x20, x0",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x7",
+        "uxtb w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "sxth x23, w22",
+        "sxtb x22, w21",
+        "sdiv x21, x23, x22",
+        "sdiv x0, x23, x22",
+        "msub x24, x0, x22, x23",
+        "mov x22, x21",
+        "bfi x22, x24, #8, #8",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "test bx, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP2 0xf7 /0",
       "ExpectedArm64ASM": [
-        "and w26, w7, #0x1",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x7",
+        "and w21, w20, #0x1",
+        "cmn wzr, w21, lsl #16",
+        "mov x26, x21"
       ]
     },
     "test ebx, 1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf7 /0",
       "ExpectedArm64ASM": [
-        "ands w26, w7, #0x1"
+        "mov x20, x7",
+        "ands w21, w20, #0x1",
+        "mov x26, x21"
       ]
     },
     "test rbx, 1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf7 /0",
       "ExpectedArm64ASM": [
-        "ands x26, x7, #0x1"
+        "mov x20, x7",
+        "ands x21, x20, #0x1",
+        "mov x26, x21"
       ]
     },
     "test bx, -1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP2 0xf7 /0",
       "ExpectedArm64ASM": [
-        "mov x26, x7",
-        "cmn wzr, w26, lsl #16"
+        "mov x20, x7",
+        "mov x21, x20",
+        "cmn wzr, w21, lsl #16",
+        "mov x26, x21"
       ]
     },
     "test ebx, -1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf7 /0",
       "ExpectedArm64ASM": [
-        "ands w26, w7, w7"
+        "mov x20, x7",
+        "ands w21, w20, w20",
+        "mov x26, x21"
       ]
     },
     "test rbx, -1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf7 /0",
       "ExpectedArm64ASM": [
-        "ands x26, x7, x7"
+        "mov x20, x7",
+        "ands x21, x20, x20",
+        "mov x26, x21"
       ]
     },
     "not bx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf7 /1",
       "ExpectedArm64ASM": [
-        "eor x7, x7, #0xffff"
+        "mov x20, x7",
+        "eor x21, x20, #0xffff",
+        "mov x7, x21"
       ]
     },
     "not ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf7 /1",
       "ExpectedArm64ASM": [
-        "mvn w7, w7"
+        "mov x20, x7",
+        "mvn w21, w20",
+        "mov x7, x21"
       ]
     },
     "not rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP2 0xf7 /1",
       "ExpectedArm64ASM": [
-        "mvn x7, x7"
+        "mov x20, x7",
+        "mvn x21, x20",
+        "mov x7, x21"
       ]
     },
     "neg bx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP2 0xf7 /2",
       "ExpectedArm64ASM": [
-        "mov x27, x7",
-        "cmp wzr, w27, lsl #16",
-        "neg w26, w27",
-        "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x7, x27",
-        "bfxil x7, x26, #0, #16",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x27, x20",
+        "cmp wzr, w20, lsl #16",
+        "neg w21, w20",
+        "mov x26, x21",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x7, x22",
+        "msr nzcv, x23"
       ]
     },
     "neg ebx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP2 0xf7 /2",
       "ExpectedArm64ASM": [
-        "mov x27, x7",
-        "negs w26, w27",
+        "mov x20, x7",
+        "mov x27, x20",
+        "negs w21, w20",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x7, x26",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x7, x21",
+        "msr nzcv, x22"
       ]
     },
     "neg rbx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP2 0xf7 /2",
       "ExpectedArm64ASM": [
-        "mov x27, x7",
-        "negs x26, x27",
+        "mov x20, x7",
+        "mov x27, x20",
+        "negs x21, x20",
+        "mov x26, x21",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "mov x7, x26",
-        "msr nzcv, x20"
+        "eor w22, w20, #0x20000000",
+        "mov x7, x21",
+        "msr nzcv, x22"
       ]
     },
     "mul bx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 15,
       "Comment": "GROUP2 0xf7 /3",
       "ExpectedArm64ASM": [
-        "uxth x20, w7",
-        "uxth x21, w4",
-        "mul x20, x20, x21",
-        "bfxil x4, x20, #0, #16",
-        "ubfx x20, x20, #16, #16",
-        "bfxil x6, x20, #0, #16",
+        "mov x20, x7",
+        "mov x21, x4",
+        "uxth x22, w20",
+        "uxth x20, w21",
+        "mul x23, x22, x20",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x4, x20",
+        "ubfx x20, x23, #16, #16",
+        "mov x21, x6",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x6, x22",
         "cmp x20, #0x0 (0)",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "mul ebx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP2 0xf7 /3",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "mul x20, x20, x21",
-        "mov w4, w20",
-        "lsr x6, x20, #32",
-        "cmp x6, #0x0 (0)",
+        "mov x20, x7",
+        "mov x21, x4",
+        "mov w22, w20",
+        "mov w20, w21",
+        "mul x21, x22, x20",
+        "mov w20, w21",
+        "lsr x22, x21, #32",
+        "mov x4, x20",
+        "mov x6, x22",
+        "cmp x22, #0x0 (0)",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "mul rbx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP2 0xf7 /3",
       "ExpectedArm64ASM": [
-        "mov x20, x4",
-        "mul x4, x7, x20",
-        "umulh x6, x7, x20",
-        "cmp x6, #0x0 (0)",
+        "mov x20, x7",
+        "mov x21, x4",
+        "mul x22, x20, x21",
+        "umulh x23, x20, x21",
+        "mov x4, x22",
+        "mov x6, x23",
+        "cmp x23, #0x0 (0)",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul bx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP2 0xf7 /5",
       "ExpectedArm64ASM": [
-        "sxth x20, w7",
-        "sxth x21, w4",
-        "mul x20, x20, x21",
-        "bfxil x4, x20, #0, #16",
-        "sbfx x21, x20, #16, #16",
-        "bfxil x6, x21, #0, #16",
-        "sbfx x20, x20, #15, #1",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "sxth x22, w20",
+        "sxth x20, w21",
+        "mul x23, x22, x20",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x4, x20",
+        "sbfx x20, x23, #16, #16",
+        "mov x21, x6",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x6, x22",
+        "sbfx x21, x23, #15, #1",
+        "cmp x20, x21",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul ebx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP2 0xf7 /5",
       "ExpectedArm64ASM": [
-        "sxtw x20, w7",
-        "sxtw x21, w4",
-        "mul x20, x20, x21",
-        "mov w4, w20",
-        "lsr x6, x20, #32",
-        "asr x21, x20, #32",
-        "sxtw x20, w20",
-        "sbfx x20, x20, #31, #1",
-        "cmp x21, x20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "sxtw x22, w20",
+        "sxtw x20, w21",
+        "mul x21, x22, x20",
+        "mov w20, w21",
+        "lsr x22, x21, #32",
+        "asr x23, x21, #32",
+        "sxtw x24, w21",
+        "mov x4, x20",
+        "mov x6, x22",
+        "sbfx x20, x24, #31, #1",
+        "cmp x23, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul rbx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP2 0xf7 /5",
       "ExpectedArm64ASM": [
-        "smulh x6, x7, x4",
-        "mul x4, x7, x4",
-        "asr x20, x4, #63",
-        "cmp x6, x20",
+        "mov x20, x7",
+        "mov x21, x4",
+        "smulh x22, x20, x21",
+        "mul x23, x20, x21",
+        "mov x4, x23",
+        "mov x6, x22",
+        "asr x20, x23, #63",
+        "cmp x22, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "div bx": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 19,
       "Comment": "GROUP2 0xf7 /6",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w4",
-        "uxth w22, w6",
-        "uxth w0, w21",
-        "bfi w0, w22, #16, #16",
-        "udiv w23, w0, w20",
-        "uxth w0, w21",
-        "bfi w0, w22, #16, #16",
-        "udiv w1, w0, w20",
-        "msub w20, w1, w20, w0",
-        "bfxil x4, x23, #0, #16",
-        "bfxil x6, x20, #0, #16"
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "mov x23, x6",
+        "uxth w24, w23",
+        "uxth w0, w22",
+        "bfi w0, w24, #16, #16",
+        "udiv w25, w0, w21",
+        "uxth w0, w22",
+        "bfi w0, w24, #16, #16",
+        "udiv w1, w0, w21",
+        "msub w30, w1, w21, w0",
+        "mov x21, x20",
+        "bfxil x21, x25, #0, #16",
+        "mov x4, x21",
+        "mov x20, x23",
+        "bfxil x20, x30, #0, #16",
+        "mov x6, x20"
       ]
     },
     "div ebx": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 17,
       "Comment": "GROUP2 0xf7 /6",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "mov w22, w6",
-        "mov x0, x21",
-        "bfi x0, x22, #32, #32",
-        "udiv x23, x0, x20",
-        "mov w4, w23",
-        "mov x0, x21",
-        "bfi x0, x22, #32, #32",
-        "udiv x1, x0, x20",
-        "msub x20, x1, x20, x0",
-        "mov w6, w20"
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "mov w22, w20",
+        "mov x20, x6",
+        "mov w23, w20",
+        "mov x0, x22",
+        "bfi x0, x23, #32, #32",
+        "udiv x20, x0, x21",
+        "mov w24, w20",
+        "mov x0, x22",
+        "bfi x0, x23, #32, #32",
+        "udiv x1, x0, x21",
+        "msub x20, x1, x21, x0",
+        "mov w21, w20",
+        "mov x4, x24",
+        "mov x6, x21"
       ]
     },
     "div rbx": {
-      "ExpectedInstructionCount": 24,
+      "ExpectedInstructionCount": 28,
       "Comment": "GROUP2 0xf7 /6",
       "ExpectedArm64ASM": [
-        "mov x20, x4",
-        "cbz x6, #+0x28",
-        "mov x0, x6",
-        "mov x1, x20",
-        "mov x2, x7",
+        "mov x20, x7",
+        "mov x21, x4",
+        "mov x22, x6",
+        "cbz x22, #+0x28",
+        "mov x0, x22",
+        "mov x1, x21",
+        "mov x2, x20",
         "ldr x3, [x28, #2432]",
         "str x30, [sp, #-16]!",
         "blr x3",
         "ldr x30, [sp], #16",
-        "mov x4, x0",
+        "mov x23, x0",
         "b #+0x8",
-        "udiv x4, x20, x7",
-        "cbz x6, #+0x28",
-        "mov x0, x6",
-        "mov x1, x20",
-        "mov x2, x7",
+        "udiv x23, x21, x20",
+        "cbz x22, #+0x28",
+        "mov x0, x22",
+        "mov x1, x21",
+        "mov x2, x20",
         "ldr x3, [x28, #2448]",
         "str x30, [sp, #-16]!",
         "blr x3",
         "ldr x30, [sp], #16",
-        "mov x6, x0",
+        "mov x24, x0",
         "b #+0xc",
-        "udiv x0, x20, x7",
-        "msub x6, x0, x7, x20"
+        "udiv x0, x21, x20",
+        "msub x24, x0, x20, x21",
+        "mov x4, x23",
+        "mov x6, x24"
       ]
     },
     "idiv bx": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 21,
       "Comment": "GROUP2 0xf7 /7",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w4",
-        "uxth w22, w6",
-        "uxth w0, w21",
-        "bfi w0, w22, #16, #16",
-        "sxth w1, w20",
-        "sdiv w23, w0, w1",
-        "uxth w0, w21",
-        "bfi w0, w22, #16, #16",
-        "sxth w1, w20",
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "mov x23, x6",
+        "uxth w24, w23",
+        "uxth w0, w22",
+        "bfi w0, w24, #16, #16",
+        "sxth w1, w21",
+        "sdiv w25, w0, w1",
+        "uxth w0, w22",
+        "bfi w0, w24, #16, #16",
+        "sxth w1, w21",
         "sdiv w2, w0, w1",
-        "msub w20, w2, w1, w0",
-        "bfxil x4, x23, #0, #16",
-        "bfxil x6, x20, #0, #16"
+        "msub w30, w2, w1, w0",
+        "mov x21, x20",
+        "bfxil x21, x25, #0, #16",
+        "mov x4, x21",
+        "mov x20, x23",
+        "bfxil x20, x30, #0, #16",
+        "mov x6, x20"
       ]
     },
     "idiv ebx": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 19,
       "Comment": "GROUP2 0xf7 /7",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "mov w22, w6",
-        "mov x0, x21",
-        "bfi x0, x22, #32, #32",
-        "sxtw x1, w20",
-        "sdiv x23, x0, x1",
-        "mov w4, w23",
-        "mov x0, x21",
-        "bfi x0, x22, #32, #32",
-        "sxtw x2, w20",
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "mov w22, w20",
+        "mov x20, x6",
+        "mov w23, w20",
+        "mov x0, x22",
+        "bfi x0, x23, #32, #32",
+        "sxtw x1, w21",
+        "sdiv x20, x0, x1",
+        "mov w24, w20",
+        "mov x0, x22",
+        "bfi x0, x23, #32, #32",
+        "sxtw x2, w21",
         "sdiv x1, x0, x2",
         "msub x20, x1, x2, x0",
-        "mov w6, w20"
+        "mov w21, w20",
+        "mov x4, x24",
+        "mov x6, x21"
       ]
     },
     "idiv rbx": {
-      "ExpectedInstructionCount": 28,
+      "ExpectedInstructionCount": 32,
       "Comment": "GROUP2 0xf7 /7",
       "ExpectedArm64ASM": [
-        "mov x20, x4",
-        "asr x0, x20, #63",
-        "eor x0, x0, x6",
+        "mov x20, x7",
+        "mov x21, x4",
+        "mov x22, x6",
+        "asr x0, x21, #63",
+        "eor x0, x0, x22",
         "cbz x0, #+0x28",
-        "mov x0, x6",
-        "mov x1, x20",
-        "mov x2, x7",
+        "mov x0, x22",
+        "mov x1, x21",
+        "mov x2, x20",
         "ldr x3, [x28, #2440]",
         "str x30, [sp, #-16]!",
         "blr x3",
         "ldr x30, [sp], #16",
-        "mov x4, x0",
+        "mov x23, x0",
         "b #+0x8",
-        "sdiv x4, x20, x7",
-        "asr x0, x20, #63",
-        "eor x0, x0, x6",
+        "sdiv x23, x21, x20",
+        "asr x0, x21, #63",
+        "eor x0, x0, x22",
         "cbz x0, #+0x28",
-        "mov x0, x6",
-        "mov x1, x20",
-        "mov x2, x7",
+        "mov x0, x22",
+        "mov x1, x21",
+        "mov x2, x20",
         "ldr x3, [x28, #2456]",
         "str x30, [sp, #-16]!",
         "blr x3",
         "ldr x30, [sp], #16",
-        "mov x6, x0",
+        "mov x24, x0",
         "b #+0xc",
-        "sdiv x0, x20, x7",
-        "msub x6, x0, x7, x20"
+        "sdiv x0, x21, x20",
+        "msub x24, x0, x20, x21",
+        "mov x4, x23",
+        "mov x6, x24"
       ]
     },
     "inc al": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP3 0xfe /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "uxtb w27, w4",
-        "cset w21, hs",
-        "lsl w0, w27, #24",
+        "mov x21, x4",
+        "uxtb w22, w21",
+        "cset w23, hs",
+        "mov x27, x22",
+        "lsl w0, w22, #24",
         "cmn w0, w20, lsl #24",
-        "add w26, w27, #0x1 (1)",
-        "mrs x20, nzcv",
-        "bfi w20, w21, #29, #1",
-        "bfxil x4, x26, #0, #8",
-        "msr nzcv, x20"
+        "add w20, w22, #0x1 (1)",
+        "mov x26, x20",
+        "mrs x22, nzcv",
+        "mov w24, w22",
+        "bfi w24, w23, #29, #1",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22",
+        "msr nzcv, x24"
       ]
     },
     "dec al": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP3 0xfe /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "uxtb w27, w4",
-        "cset w21, hs",
-        "lsl w0, w27, #24",
+        "mov x21, x4",
+        "uxtb w22, w21",
+        "cset w23, hs",
+        "mov x27, x22",
+        "lsl w0, w22, #24",
         "cmp w0, w20, lsl #24",
-        "sub w26, w27, #0x1 (1)",
-        "mrs x20, nzcv",
-        "bfi w20, w21, #29, #1",
-        "bfxil x4, x26, #0, #8",
-        "msr nzcv, x20"
+        "sub w20, w22, #0x1 (1)",
+        "mov x26, x20",
+        "mrs x22, nzcv",
+        "mov w24, w22",
+        "bfi w24, w23, #29, #1",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22",
+        "msr nzcv, x24"
       ]
     },
     "inc ax": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP4 0xfe /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "uxth w27, w4",
-        "cset w21, hs",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "uxth w22, w21",
+        "cset w23, hs",
+        "mov x27, x22",
+        "lsl w0, w22, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w27, #0x1 (1)",
-        "mrs x20, nzcv",
-        "bfi w20, w21, #29, #1",
-        "bfxil x4, x26, #0, #16",
-        "msr nzcv, x20"
+        "add w20, w22, #0x1 (1)",
+        "mov x26, x20",
+        "mrs x22, nzcv",
+        "mov w24, w22",
+        "bfi w24, w23, #29, #1",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22",
+        "msr nzcv, x24"
       ]
     },
     "inc eax": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP4 0xfe /0",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "cset w20, hs",
-        "adds w26, w27, #0x1 (1)",
-        "mrs x21, nzcv",
-        "mov w0, w21",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "cset w21, hs",
+        "mov x27, x20",
+        "adds w22, w20, #0x1 (1)",
+        "mov x26, x22",
+        "mrs x20, nzcv",
+        "mov w23, w20",
+        "bfi w23, w21, #29, #1",
+        "mov x4, x22",
+        "msr nzcv, x23"
       ]
     },
     "inc rax": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP4 0xfe /0",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "cset w20, hs",
-        "adds x26, x27, #0x1 (1)",
-        "mrs x21, nzcv",
-        "mov w0, w21",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "cset w21, hs",
+        "mov x27, x20",
+        "adds x22, x20, #0x1 (1)",
+        "mov x26, x22",
+        "mrs x20, nzcv",
+        "mov w23, w20",
+        "bfi w23, w21, #29, #1",
+        "mov x4, x22",
+        "msr nzcv, x23"
       ]
     },
     "dec ax": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 16,
       "Comment": "GROUP4 0xfe /1",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "uxth w27, w4",
-        "cset w21, hs",
-        "lsl w0, w27, #16",
+        "mov x21, x4",
+        "uxth w22, w21",
+        "cset w23, hs",
+        "mov x27, x22",
+        "lsl w0, w22, #16",
         "cmp w0, w20, lsl #16",
-        "sub w26, w27, #0x1 (1)",
-        "mrs x20, nzcv",
-        "bfi w20, w21, #29, #1",
-        "bfxil x4, x26, #0, #16",
-        "msr nzcv, x20"
+        "sub w20, w22, #0x1 (1)",
+        "mov x26, x20",
+        "mrs x22, nzcv",
+        "mov w24, w22",
+        "bfi w24, w23, #29, #1",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22",
+        "msr nzcv, x24"
       ]
     },
     "dec eax": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP4 0xfe /1",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "cset w20, hs",
-        "subs w26, w27, #0x1 (1)",
-        "mrs x21, nzcv",
-        "mov w0, w21",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "cset w21, hs",
+        "mov x27, x20",
+        "subs w22, w20, #0x1 (1)",
+        "mov x26, x22",
+        "mrs x20, nzcv",
+        "mov w23, w20",
+        "bfi w23, w21, #29, #1",
+        "mov x4, x22",
+        "msr nzcv, x23"
       ]
     },
     "dec rax": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP4 0xfe /1",
       "ExpectedArm64ASM": [
-        "mov x27, x4",
-        "cset w20, hs",
-        "subs x26, x27, #0x1 (1)",
-        "mrs x21, nzcv",
-        "mov w0, w21",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
-        "mov x4, x26",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "cset w21, hs",
+        "mov x27, x20",
+        "subs x22, x20, #0x1 (1)",
+        "mov x26, x22",
+        "mrs x20, nzcv",
+        "mov w23, w20",
+        "bfi w23, w21, #29, #1",
+        "mov x4, x22",
+        "msr nzcv, x23"
       ]
     },
     "push ax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP4 0xff /6",
       "ExpectedArm64ASM": [
-        "strh w4, [x8, #-2]!"
+        "mov x20, x4",
+        "mov x21, x8",
+        "mov x22, x21",
+        "strh w20, [x22, #-2]!",
+        "mov x8, x22"
       ]
     },
     "push rax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP4 0xff /6",
       "ExpectedArm64ASM": [
-        "str x4, [x8, #-8]!"
+        "mov x20, x4",
+        "mov x21, x8",
+        "mov x22, x21",
+        "str x20, [x22, #-8]!",
+        "mov x8, x22"
       ]
     },
     "mov byte [rax], 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP11 0xc6 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x0",
-        "strb w20, [x4]"
+        "mov x21, x4",
+        "strb w20, [x21]"
       ]
     },
     "mov word [rax], 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP11 0xc7 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x0",
-        "strh w20, [x4]"
+        "mov x21, x4",
+        "strh w20, [x21]"
       ]
     },
     "mov dword [rax], 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP11 0xc7 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x0",
-        "str w20, [x4]"
+        "mov x21, x4",
+        "str w20, [x21]"
       ]
     },
     "mov qword [rax], 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP11 0xc7 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x0",
-        "str x20, [x4]"
+        "mov x21, x4",
+        "str x20, [x21]"
       ]
     },
     "mov byte [rax], 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP11 0xc6 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "strb w20, [x4]"
+        "mov x21, x4",
+        "strb w20, [x21]"
       ]
     },
     "mov word [rax], 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP11 0xc7 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "strh w20, [x4]"
+        "mov x21, x4",
+        "strh w20, [x21]"
       ]
     },
     "mov dword [rax], 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP11 0xc7 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "str w20, [x4]"
+        "mov x21, x4",
+        "str w20, [x21]"
       ]
     },
     "mov qword [rax], 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP11 0xc7 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "str x20, [x4]"
+        "mov x21, x4",
+        "str x20, [x21]"
       ]
     },
     "mov byte [rax], -1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP11 0xc6 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xff",
-        "strb w20, [x4]"
+        "mov x21, x4",
+        "strb w20, [x21]"
       ]
     },
     "mov word [rax], -1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP11 0xc7 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xffff",
-        "strh w20, [x4]"
+        "mov x21, x4",
+        "strh w20, [x21]"
       ]
     },
     "mov dword [rax], -1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP11 0xc7 /0",
       "ExpectedArm64ASM": [
         "mov w20, #0xffffffff",
-        "str w20, [x4]"
+        "mov x21, x4",
+        "str w20, [x21]"
       ]
     },
     "mov qword [rax], -1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP11 0xc7 /0",
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
-        "str x20, [x4]"
+        "mov x21, x4",
+        "str x20, [x21]"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/Primary_32Bit.json b/unittests/InstructionCountCI/Primary_32Bit.json
index cf8b6df87f..4de497d130 100644
--- a/unittests/InstructionCountCI/Primary_32Bit.json
+++ b/unittests/InstructionCountCI/Primary_32Bit.json
@@ -11,390 +11,523 @@
   },
   "Instructions": {
     "push es": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x06",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x28, #136]",
-        "str w20, [x8, #-4]!"
+        "mov w20, w8",
+        "ldrh w21, [x28, #136]",
+        "mov w22, w20",
+        "str w21, [x22, #-4]!",
+        "mov w8, w22"
       ]
     },
     "pop es": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x07",
       "ExpectedArm64ASM": [
-        "ldr w20, [x8]",
-        "add x8, x8, #0x4 (4)",
-        "strh w20, [x28, #136]",
-        "ubfx w20, w20, #3, #13",
+        "mov w20, w8",
+        "ldr w21, [x20]",
+        "add x22, x20, #0x4 (4)",
+        "mov w8, w22",
+        "strh w21, [x28, #136]",
+        "ubfx w20, w21, #3, #13",
         "add x0, x28, x20, lsl #2",
-        "ldr w20, [x0, #896]",
-        "str w20, [x28, #152]"
+        "ldr w21, [x0, #896]",
+        "str w21, [x28, #152]"
       ]
     },
     "push cs": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0e",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x28, #138]",
-        "str w20, [x8, #-4]!"
+        "mov w20, w8",
+        "ldrh w21, [x28, #138]",
+        "mov w22, w20",
+        "str w21, [x22, #-4]!",
+        "mov w8, w22"
       ]
     },
     "push ss": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x16",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x28, #140]",
-        "str w20, [x8, #-4]!"
+        "mov w20, w8",
+        "ldrh w21, [x28, #140]",
+        "mov w22, w20",
+        "str w21, [x22, #-4]!",
+        "mov w8, w22"
       ]
     },
     "pop ss": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x17",
       "ExpectedArm64ASM": [
-        "ldr w20, [x8]",
-        "add x8, x8, #0x4 (4)",
-        "strh w20, [x28, #140]",
-        "ubfx w20, w20, #3, #13",
+        "mov w20, w8",
+        "ldr w21, [x20]",
+        "add x22, x20, #0x4 (4)",
+        "mov w8, w22",
+        "strh w21, [x28, #140]",
+        "ubfx w20, w21, #3, #13",
         "add x0, x28, x20, lsl #2",
-        "ldr w20, [x0, #896]",
-        "str w20, [x28, #160]"
+        "ldr w21, [x0, #896]",
+        "str w21, [x28, #160]"
       ]
     },
     "push ds": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x1e",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x28, #142]",
-        "str w20, [x8, #-4]!"
+        "mov w20, w8",
+        "ldrh w21, [x28, #142]",
+        "mov w22, w20",
+        "str w21, [x22, #-4]!",
+        "mov w8, w22"
       ]
     },
     "pop ds": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x1f",
       "ExpectedArm64ASM": [
-        "ldr w20, [x8]",
-        "add x8, x8, #0x4 (4)",
-        "strh w20, [x28, #142]",
-        "ubfx w20, w20, #3, #13",
+        "mov w20, w8",
+        "ldr w21, [x20]",
+        "add x22, x20, #0x4 (4)",
+        "mov w8, w22",
+        "strh w21, [x28, #142]",
+        "ubfx w20, w21, #3, #13",
         "add x0, x28, x20, lsl #2",
-        "ldr w20, [x0, #896]",
-        "str w20, [x28, #164]"
+        "ldr w21, [x0, #896]",
+        "str w21, [x28, #164]"
       ]
     },
     "daa": {
-      "ExpectedInstructionCount": 23,
+      "ExpectedInstructionCount": 30,
       "Comment": "0x27",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "cset w21, hs",
-        "and x22, x20, #0xf",
-        "cmp x22, #0x9 (9)",
-        "cset x22, hi",
-        "eor w23, w27, w26",
-        "ubfx w23, w23, #4, #1",
-        "orr x22, x23, x22",
-        "cmp x20, #0x99 (153)",
+        "mov w20, w4",
+        "uxtb w21, w20",
+        "cset w22, hs",
+        "and x23, x21, #0xf",
+        "cmp x23, #0x9 (9)",
+        "cset x24, hi",
+        "mov w23, w27",
+        "mov w25, w26",
+        "eor w12, w23, w25",
+        "ubfx w23, w12, #4, #1",
+        "orr x25, x23, x24",
+        "cmp x21, #0x99 (153)",
         "cset x23, hi",
-        "orr x21, x21, x23",
-        "add x23, x20, #0x6 (6)",
-        "cmp x22, #0x0 (0)",
-        "csel x20, x23, x20, ne",
-        "add x23, x20, #0x60 (96)",
-        "cmp x21, #0x0 (0)",
-        "csel x26, x23, x20, ne",
-        "bfxil w4, w26, #0, #8",
-        "cmn wzr, w26, lsl #24",
+        "orr x24, x22, x23",
+        "add x22, x21, #0x6 (6)",
+        "cmp x25, #0x0 (0)",
+        "csel x23, x22, x21, ne",
+        "add x21, x23, #0x60 (96)",
+        "cmp x24, #0x0 (0)",
+        "csel x22, x21, x23, ne",
+        "mov w21, w20",
+        "bfxil w21, w22, #0, #8",
+        "mov w4, w21",
+        "cmn wzr, w22, lsl #24",
         "mrs x20, nzcv",
-        "orr w20, w20, w21, lsl #29",
-        "eor w27, w26, w22, lsl #4",
-        "msr nzcv, x20"
+        "orr w21, w20, w24, lsl #29",
+        "mov w26, w22",
+        "eor w20, w22, w25, lsl #4",
+        "mov w27, w20",
+        "msr nzcv, x21"
       ]
     },
     "das": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 33,
       "Comment": "0x2f",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "cset w21, hs",
-        "and x22, x20, #0xf",
-        "cmp x22, #0x9 (9)",
-        "cset x22, hi",
-        "eor w23, w27, w26",
-        "ubfx w23, w23, #4, #1",
-        "orr x22, x23, x22",
-        "cmp x20, #0x99 (153)",
+        "mov w20, w4",
+        "uxtb w21, w20",
+        "cset w22, hs",
+        "and x23, x21, #0xf",
+        "cmp x23, #0x9 (9)",
+        "cset x24, hi",
+        "mov w23, w27",
+        "mov w25, w26",
+        "eor w12, w23, w25",
+        "ubfx w23, w12, #4, #1",
+        "orr x25, x23, x24",
+        "cmp x21, #0x99 (153)",
         "cset x23, hi",
-        "orr x21, x21, x23",
-        "cmp x20, #0x6 (6)",
-        "csel x23, x22, x21, lo",
-        "orr w23, w21, w23",
-        "sub x24, x20, #0x6 (6)",
-        "cmp x22, #0x0 (0)",
-        "csel x20, x24, x20, ne",
-        "sub x24, x20, #0x60 (96)",
-        "cmp x21, #0x0 (0)",
-        "csel x26, x24, x20, ne",
-        "bfxil w4, w26, #0, #8",
-        "cmn wzr, w26, lsl #24",
+        "orr x24, x22, x23",
+        "cmp x21, #0x6 (6)",
+        "csel x22, x25, x24, lo",
+        "orr w23, w24, w22",
+        "sub x22, x21, #0x6 (6)",
+        "cmp x25, #0x0 (0)",
+        "csel x12, x22, x21, ne",
+        "sub x21, x12, #0x60 (96)",
+        "cmp x24, #0x0 (0)",
+        "csel x22, x21, x12, ne",
+        "mov w21, w20",
+        "bfxil w21, w22, #0, #8",
+        "mov w4, w21",
+        "cmn wzr, w22, lsl #24",
         "mrs x20, nzcv",
-        "orr w20, w20, w23, lsl #29",
-        "eor w27, w26, w22, lsl #4",
-        "msr nzcv, x20"
+        "orr w21, w20, w23, lsl #29",
+        "mov w26, w22",
+        "eor w20, w22, w25, lsl #4",
+        "mov w27, w20",
+        "msr nzcv, x21"
       ]
     },
     "aaa": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 20,
       "Comment": "0x37",
       "ExpectedArm64ASM": [
-        "and x20, x4, #0xf",
-        "cmp x20, #0x9 (9)",
-        "cset x20, hi",
-        "eor w21, w27, w26",
-        "ubfx w21, w21, #4, #1",
-        "orr x20, x21, x20",
-        "lsl x21, x20, #29",
-        "eor w27, w26, w20, lsl #4",
+        "mov w20, w4",
+        "and x21, x20, #0xf",
+        "cmp x21, #0x9 (9)",
+        "cset x22, hi",
+        "mov w21, w27",
+        "mov w23, w26",
+        "eor w24, w21, w23",
+        "ubfx w21, w24, #4, #1",
+        "orr x24, x21, x22",
+        "lsl x21, x24, #29",
+        "eor w22, w23, w24, lsl #4",
+        "mov w27, w22",
         "msr nzcv, x21",
-        "add w20, w4, #0x106 (262)",
-        "csel w20, w20, w4, hs",
+        "add w21, w20, #0x106 (262)",
+        "csel w22, w21, w20, hs",
         "mov w21, #0xff0f",
-        "and w20, w20, w21",
-        "bfxil w4, w20, #0, #16"
+        "and w23, w22, w21",
+        "mov w21, w20",
+        "bfxil w21, w23, #0, #16",
+        "mov w4, w21"
       ]
     },
     "aas": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 20,
       "Comment": "0x3f",
       "ExpectedArm64ASM": [
-        "and x20, x4, #0xf",
-        "cmp x20, #0x9 (9)",
-        "cset x20, hi",
-        "eor w21, w27, w26",
-        "ubfx w21, w21, #4, #1",
-        "orr x20, x21, x20",
-        "lsl x21, x20, #29",
-        "eor w27, w26, w20, lsl #4",
+        "mov w20, w4",
+        "and x21, x20, #0xf",
+        "cmp x21, #0x9 (9)",
+        "cset x22, hi",
+        "mov w21, w27",
+        "mov w23, w26",
+        "eor w24, w21, w23",
+        "ubfx w21, w24, #4, #1",
+        "orr x24, x21, x22",
+        "lsl x21, x24, #29",
+        "eor w22, w23, w24, lsl #4",
+        "mov w27, w22",
         "msr nzcv, x21",
-        "sub w20, w4, #0x106 (262)",
-        "csel w20, w20, w4, hs",
+        "sub w21, w20, #0x106 (262)",
+        "csel w22, w21, w20, hs",
         "mov w21, #0xff0f",
-        "and w20, w20, w21",
-        "bfxil w4, w20, #0, #16"
+        "and w23, w22, w21",
+        "mov w21, w20",
+        "bfxil w21, w23, #0, #16",
+        "mov w4, w21"
       ]
     },
     "inc ax": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 16,
       "Comment": "0x40",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "uxth w27, w4",
-        "cset w21, hs",
-        "lsl w0, w27, #16",
+        "mov w21, w4",
+        "uxth w22, w21",
+        "cset w23, hs",
+        "mov w27, w22",
+        "lsl w0, w22, #16",
         "cmn w0, w20, lsl #16",
-        "add w26, w27, #0x1 (1)",
-        "mrs x20, nzcv",
-        "bfi w20, w21, #29, #1",
-        "bfxil w4, w26, #0, #16",
-        "msr nzcv, x20"
+        "add w20, w22, #0x1 (1)",
+        "mov w26, w20",
+        "mrs x22, nzcv",
+        "mov w24, w22",
+        "bfi w24, w23, #29, #1",
+        "mov w22, w21",
+        "bfxil w22, w20, #0, #16",
+        "mov w4, w22",
+        "msr nzcv, x24"
       ]
     },
     "inc eax": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x40",
       "ExpectedArm64ASM": [
-        "mov w27, w4",
-        "cset w20, hs",
-        "adds w26, w27, #0x1 (1)",
-        "mrs x21, nzcv",
-        "mov w0, w21",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
-        "mov w4, w26",
-        "msr nzcv, x20"
+        "mov w20, w4",
+        "cset w21, hs",
+        "mov w27, w20",
+        "adds w22, w20, #0x1 (1)",
+        "mov w26, w22",
+        "mrs x20, nzcv",
+        "mov w23, w20",
+        "bfi w23, w21, #29, #1",
+        "mov w4, w22",
+        "msr nzcv, x23"
       ]
     },
     "dec ax": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 16,
       "Comment": "0x48",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
-        "uxth w27, w4",
-        "cset w21, hs",
-        "lsl w0, w27, #16",
+        "mov w21, w4",
+        "uxth w22, w21",
+        "cset w23, hs",
+        "mov w27, w22",
+        "lsl w0, w22, #16",
         "cmp w0, w20, lsl #16",
-        "sub w26, w27, #0x1 (1)",
-        "mrs x20, nzcv",
-        "bfi w20, w21, #29, #1",
-        "bfxil w4, w26, #0, #16",
-        "msr nzcv, x20"
+        "sub w20, w22, #0x1 (1)",
+        "mov w26, w20",
+        "mrs x22, nzcv",
+        "mov w24, w22",
+        "bfi w24, w23, #29, #1",
+        "mov w22, w21",
+        "bfxil w22, w20, #0, #16",
+        "mov w4, w22",
+        "msr nzcv, x24"
       ]
     },
     "push ax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x50",
       "ExpectedArm64ASM": [
-        "strh w4, [x8, #-2]!"
+        "mov w20, w4",
+        "mov w21, w8",
+        "mov w22, w21",
+        "strh w20, [x22, #-2]!",
+        "mov w8, w22"
       ]
     },
     "push eax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x50",
       "ExpectedArm64ASM": [
-        "str w4, [x8, #-4]!"
+        "mov w20, w4",
+        "mov w21, w8",
+        "mov w22, w21",
+        "str w20, [x22, #-4]!",
+        "mov w8, w22"
       ]
     },
     "dec eax": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x48",
       "ExpectedArm64ASM": [
-        "mov w27, w4",
-        "cset w20, hs",
-        "subs w26, w27, #0x1 (1)",
-        "mrs x21, nzcv",
-        "mov w0, w21",
-        "bfi w0, w20, #29, #1",
-        "mov w20, w0",
-        "mov w4, w26",
-        "msr nzcv, x20"
+        "mov w20, w4",
+        "cset w21, hs",
+        "mov w27, w20",
+        "subs w22, w20, #0x1 (1)",
+        "mov w26, w22",
+        "mrs x20, nzcv",
+        "mov w23, w20",
+        "bfi w23, w21, #29, #1",
+        "mov w4, w22",
+        "msr nzcv, x23"
       ]
     },
     "pusha": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 25,
       "Comment": "0x60",
       "ExpectedArm64ASM": [
         "mov w20, w8",
-        "str w4, [x20, #-4]!",
-        "str w5, [x20, #-4]!",
-        "str w6, [x20, #-4]!",
-        "str w7, [x20, #-4]!",
-        "str w8, [x20, #-4]!",
-        "str w9, [x20, #-4]!",
-        "str w10, [x20, #-4]!",
-        "mov w8, w20",
-        "str w11, [x8, #-4]!"
+        "mov w21, w4",
+        "mov w22, w20",
+        "str w21, [x22, #-4]!",
+        "mov w21, w5",
+        "mov w23, w22",
+        "str w21, [x23, #-4]!",
+        "mov w21, w6",
+        "mov w22, w23",
+        "str w21, [x22, #-4]!",
+        "mov w21, w7",
+        "mov w23, w22",
+        "str w21, [x23, #-4]!",
+        "mov w21, w23",
+        "str w20, [x21, #-4]!",
+        "mov w20, w9",
+        "mov w22, w21",
+        "str w20, [x22, #-4]!",
+        "mov w20, w10",
+        "mov w21, w22",
+        "str w20, [x21, #-4]!",
+        "mov w20, w11",
+        "mov w22, w21",
+        "str w20, [x22, #-4]!",
+        "mov w8, w22"
       ]
     },
     "pushad": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 25,
       "Comment": "0x60",
       "ExpectedArm64ASM": [
         "mov w20, w8",
-        "str w4, [x20, #-4]!",
-        "str w5, [x20, #-4]!",
-        "str w6, [x20, #-4]!",
-        "str w7, [x20, #-4]!",
-        "str w8, [x20, #-4]!",
-        "str w9, [x20, #-4]!",
-        "str w10, [x20, #-4]!",
-        "mov w8, w20",
-        "str w11, [x8, #-4]!"
+        "mov w21, w4",
+        "mov w22, w20",
+        "str w21, [x22, #-4]!",
+        "mov w21, w5",
+        "mov w23, w22",
+        "str w21, [x23, #-4]!",
+        "mov w21, w6",
+        "mov w22, w23",
+        "str w21, [x22, #-4]!",
+        "mov w21, w7",
+        "mov w23, w22",
+        "str w21, [x23, #-4]!",
+        "mov w21, w23",
+        "str w20, [x21, #-4]!",
+        "mov w20, w9",
+        "mov w22, w21",
+        "str w20, [x22, #-4]!",
+        "mov w20, w10",
+        "mov w21, w22",
+        "str w20, [x21, #-4]!",
+        "mov w20, w11",
+        "mov w22, w21",
+        "str w20, [x22, #-4]!",
+        "mov w8, w22"
       ]
     },
     "popa": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 23,
       "Comment": "0x61",
       "ExpectedArm64ASM": [
-        "ldr w11, [x8]",
-        "add x20, x8, #0x4 (4)",
-        "ldr w10, [x20]",
-        "add x20, x20, #0x4 (4)",
-        "ldr w9, [x20]",
-        "add x20, x20, #0x8 (8)",
-        "ldr w7, [x20]",
-        "add x20, x20, #0x4 (4)",
-        "ldr w6, [x20]",
-        "add x20, x20, #0x4 (4)",
-        "ldr w5, [x20]",
-        "add x20, x20, #0x4 (4)",
-        "ldr w4, [x20]",
-        "add x8, x20, #0x4 (4)"
+        "mov w20, w8",
+        "ldr w21, [x20]",
+        "mov w11, w21",
+        "add x21, x20, #0x4 (4)",
+        "ldr w20, [x21]",
+        "mov w10, w20",
+        "add x20, x21, #0x4 (4)",
+        "ldr w21, [x20]",
+        "mov w9, w21",
+        "add x21, x20, #0x8 (8)",
+        "ldr w20, [x21]",
+        "mov w7, w20",
+        "add x20, x21, #0x4 (4)",
+        "ldr w21, [x20]",
+        "mov w6, w21",
+        "add x21, x20, #0x4 (4)",
+        "ldr w20, [x21]",
+        "mov w5, w20",
+        "add x20, x21, #0x4 (4)",
+        "ldr w21, [x20]",
+        "mov w4, w21",
+        "add x21, x20, #0x4 (4)",
+        "mov w8, w21"
       ]
     },
     "popad": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 23,
       "Comment": "0x61",
       "ExpectedArm64ASM": [
-        "ldr w11, [x8]",
-        "add x20, x8, #0x4 (4)",
-        "ldr w10, [x20]",
-        "add x20, x20, #0x4 (4)",
-        "ldr w9, [x20]",
-        "add x20, x20, #0x8 (8)",
-        "ldr w7, [x20]",
-        "add x20, x20, #0x4 (4)",
-        "ldr w6, [x20]",
-        "add x20, x20, #0x4 (4)",
-        "ldr w5, [x20]",
-        "add x20, x20, #0x4 (4)",
-        "ldr w4, [x20]",
-        "add x8, x20, #0x4 (4)"
+        "mov w20, w8",
+        "ldr w21, [x20]",
+        "mov w11, w21",
+        "add x21, x20, #0x4 (4)",
+        "ldr w20, [x21]",
+        "mov w10, w20",
+        "add x20, x21, #0x4 (4)",
+        "ldr w21, [x20]",
+        "mov w9, w21",
+        "add x21, x20, #0x8 (8)",
+        "ldr w20, [x21]",
+        "mov w7, w20",
+        "add x20, x21, #0x4 (4)",
+        "ldr w21, [x20]",
+        "mov w6, w21",
+        "add x21, x20, #0x4 (4)",
+        "ldr w20, [x21]",
+        "mov w5, w20",
+        "add x20, x21, #0x4 (4)",
+        "ldr w21, [x20]",
+        "mov w4, w21",
+        "add x21, x20, #0x4 (4)",
+        "mov w8, w21"
       ]
     },
     "aam": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": "0xd4",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "mov w21, #0xa",
-        "udiv x22, x20, x21",
-        "udiv x2, x20, x21",
-        "msub x20, x2, x21, x20",
-        "add x26, x20, x22, lsl #8",
-        "bfxil w4, w26, #0, #16",
-        "cmn wzr, w26, lsl #24"
+        "mov w20, w4",
+        "uxtb w21, w20",
+        "mov w22, #0xa",
+        "udiv x23, x21, x22",
+        "udiv x2, x21, x22",
+        "msub x24, x2, x22, x21",
+        "add x21, x24, x23, lsl #8",
+        "mov w22, w20",
+        "bfxil w22, w21, #0, #16",
+        "mov w4, w22",
+        "cmn wzr, w21, lsl #24",
+        "mov w26, w21"
       ]
     },
     "aad": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0xd5",
       "ExpectedArm64ASM": [
-        "lsr w20, w4, #8",
-        "mov w21, #0xa",
-        "mul x20, x20, x21",
-        "add x20, x4, x20",
-        "and x26, x20, #0xff",
-        "bfxil w4, w26, #0, #16",
-        "cmn wzr, w26, lsl #24"
+        "mov w20, w4",
+        "lsr w21, w20, #8",
+        "mov w22, #0xa",
+        "mul x23, x21, x22",
+        "add x21, x20, x23",
+        "and x22, x21, #0xff",
+        "mov w21, w20",
+        "bfxil w21, w22, #0, #16",
+        "mov w4, w21",
+        "cmn wzr, w22, lsl #24",
+        "mov w26, w22"
       ]
     },
     "db 0xd4, 0x40": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "aam with a different immediate byte base",
         "0xd4"
       ],
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "mov w21, #0x40",
-        "udiv x22, x20, x21",
-        "udiv x2, x20, x21",
-        "msub x20, x2, x21, x20",
-        "add x26, x20, x22, lsl #8",
-        "bfxil w4, w26, #0, #16",
-        "cmn wzr, w26, lsl #24"
+        "mov w20, w4",
+        "uxtb w21, w20",
+        "mov w22, #0x40",
+        "udiv x23, x21, x22",
+        "udiv x2, x21, x22",
+        "msub x24, x2, x22, x21",
+        "add x21, x24, x23, lsl #8",
+        "mov w22, w20",
+        "bfxil w22, w21, #0, #16",
+        "mov w4, w22",
+        "cmn wzr, w21, lsl #24",
+        "mov w26, w21"
       ]
     },
     "db 0xd5, 0x40": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "aad with a different immediate byte base",
         "0xd5"
       ],
       "ExpectedArm64ASM": [
-        "lsr w20, w4, #8",
-        "lsl x20, x20, #6",
-        "add x20, x4, x20",
-        "and x26, x20, #0xff",
-        "bfxil w4, w26, #0, #16",
-        "cmn wzr, w26, lsl #24"
+        "mov w20, w4",
+        "lsr w21, w20, #8",
+        "lsl x22, x21, #6",
+        "add x21, x20, x22",
+        "and x22, x21, #0xff",
+        "mov w21, w20",
+        "bfxil w21, w22, #0, #16",
+        "mov w4, w21",
+        "cmn wzr, w22, lsl #24",
+        "mov w26, w22"
       ]
     },
     "salc": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xd6",
       "ExpectedArm64ASM": [
         "csetm w20, hs",
-        "bfxil w4, w20, #0, #8"
+        "mov w21, w4",
+        "mov w22, w21",
+        "bfxil w22, w20, #0, #8",
+        "mov w4, w22"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/RPRES/DDD.json b/unittests/InstructionCountCI/RPRES/DDD.json
index 8208bfd80d..d67b5c7437 100644
--- a/unittests/InstructionCountCI/RPRES/DDD.json
+++ b/unittests/InstructionCountCI/RPRES/DDD.json
@@ -18,8 +18,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
-        "frecpe v2.2s, v2.2s",
-        "str d2, [x28, #768]"
+        "frecpe v3.2s, v2.2s",
+        "str d3, [x28, #768]"
       ]
     },
     "pfrsqrtv mm0, mm1": {
@@ -29,8 +29,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
-        "frsqrte v2.2s, v2.2s",
-        "str d2, [x28, #768]"
+        "frsqrte v3.2s, v2.2s",
+        "str d3, [x28, #768]"
       ]
     },
     "pfrcp mm0, mm1": {
@@ -40,8 +40,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
-        "frecpe s2, s2",
-        "dup v2.2s, v2.s[0]",
+        "frecpe s3, s2",
+        "dup v2.2s, v3.s[0]",
         "str d2, [x28, #768]"
       ]
     },
@@ -52,8 +52,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
-        "frsqrte s2, s2",
-        "dup v2.2s, v2.s[0]",
+        "frsqrte s3, s2",
+        "dup v2.2s, v3.s[0]",
         "str d2, [x28, #768]"
       ]
     }
diff --git a/unittests/InstructionCountCI/RPRES/Secondary.json b/unittests/InstructionCountCI/RPRES/Secondary.json
index b5cd83ccde..71ee845371 100644
--- a/unittests/InstructionCountCI/RPRES/Secondary.json
+++ b/unittests/InstructionCountCI/RPRES/Secondary.json
@@ -12,21 +12,25 @@
   },
   "Instructions": {
     "rsqrtps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x0f 0x52"
       ],
       "ExpectedArm64ASM": [
-        "frsqrte v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "frsqrte v3.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "rcpps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0x0f 0x53"
       ],
       "ExpectedArm64ASM": [
-        "frecpe v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "frecpe v3.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/RPRES/Secondary_REP_AFP.json b/unittests/InstructionCountCI/RPRES/Secondary_REP_AFP.json
index ecff22b523..417a1bb541 100644
--- a/unittests/InstructionCountCI/RPRES/Secondary_REP_AFP.json
+++ b/unittests/InstructionCountCI/RPRES/Secondary_REP_AFP.json
@@ -12,21 +12,29 @@
   },
   "Instructions": {
     "rsqrtss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0x52"
       ],
       "ExpectedArm64ASM": [
-        "frsqrte s16, s17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frsqrte s4, s3",
+        "mov v16.16b, v4.16b"
       ]
     },
     "rcpss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xf3 0x0f 0x53"
       ],
       "ExpectedArm64ASM": [
-        "frecpe s16, s17"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "frecpe s4, s3",
+        "mov v16.16b, v4.16b"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/RPRES/VEX_map1_AFP.json b/unittests/InstructionCountCI/RPRES/VEX_map1_AFP.json
index 40eeb98d42..ffb63080e6 100644
--- a/unittests/InstructionCountCI/RPRES/VEX_map1_AFP.json
+++ b/unittests/InstructionCountCI/RPRES/VEX_map1_AFP.json
@@ -11,60 +11,74 @@
   },
   "Instructions": {
     "vrsqrtps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x52 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "frsqrte v16.4s, v17.4s"
+        "mov z2.d, p7/m, z17.d",
+        "frsqrte v3.4s, v2.4s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vrsqrtps ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x52 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "frsqrte z16.s, z17.s"
+        "mov z2.d, p7/m, z17.d",
+        "frsqrte z3.s, z2.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vrsqrtss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "AFP can make this more optimal",
         "Map 1 0b10 0x52 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "frsqrte s16, s18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "frsqrte s4, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vrcpps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x53 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "frecpe v16.4s, v17.4s"
+        "mov z2.d, p7/m, z17.d",
+        "frecpe v3.4s, v2.4s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vrcpps ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x53 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "frecpe z16.s, z17.s"
+        "mov z2.d, p7/m, z17.d",
+        "frecpe z3.s, z2.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vrcpss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b10 0x53 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "frecpe s16, s18"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "frecpe s4, s3",
+        "mov z16.d, p7/m, z4.d"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/Secondary.json b/unittests/InstructionCountCI/Secondary.json
index 8978954d99..4773329c58 100644
--- a/unittests/InstructionCountCI/Secondary.json
+++ b/unittests/InstructionCountCI/Secondary.json
@@ -32,80 +32,111 @@
       "ExpectedArm64ASM": []
     },
     "movups xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0x0f 0x10",
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movups xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0x10",
       "ExpectedArm64ASM": [
-        "ldr q16, [x4]"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movups [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0x11",
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "movlps xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x12",
       "ExpectedArm64ASM": [
-        "ld1 {v16.d}[0], [x4]"
+        "mov x20, x4",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.d}[0], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "movlps [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0x13",
       "ExpectedArm64ASM": [
-        "str d16, [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "str d2, [x20]"
       ]
     },
     "movhlps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x12",
       "ExpectedArm64ASM": [
-        "mov v16.d[0], v17.d[1]"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v3.16b",
+        "mov v4.d[0], v2.d[1]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "unpcklps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x14",
       "ExpectedArm64ASM": [
-        "zip1 v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "zip1 v4.4s, v2.4s, v3.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "unpckhps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x15",
       "ExpectedArm64ASM": [
-        "zip2 v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "zip2 v4.4s, v2.4s, v3.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "movhps xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x16",
       "ExpectedArm64ASM": [
-        "ld1 {v16.d}[1], [x4]"
+        "mov x20, x4",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.d}[1], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "movlhps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x16",
       "ExpectedArm64ASM": [
-        "mov v16.d[1], v17.d[0]"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v3.16b",
+        "mov v4.d[1], v2.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "movhps [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0x17",
       "ExpectedArm64ASM": [
-        "st1 {v16.d}[1], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.d}[1], [x20]"
       ]
     },
     "nop": {
@@ -119,670 +150,912 @@
       "ExpectedArm64ASM": []
     },
     "movaps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0x0f 0x28",
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movaps xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0x28",
       "ExpectedArm64ASM": [
-        "ldr q16, [x4]"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movaps [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0x29",
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "cvtpi2ps xmm0, [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
-        "scvtf v0.2s, v2.2s",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr d3, [x20]",
+        "mov v4.16b, v2.16b",
+        "scvtf v0.2s, v3.2s",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cvtpi2ps xmm0, mm0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr d2, [x28, #768]",
-        "scvtf v0.2s, v2.2s",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "ldr d3, [x28, #768]",
+        "mov v4.16b, v2.16b",
+        "scvtf v0.2s, v3.2s",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "movntps [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0x2b",
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "cvttps2pi mm0, [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x2c",
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
-        "fcvtzs v2.2s, v2.2s",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "ldr d2, [x20]",
+        "fcvtzs v3.2s, v2.2s",
+        "str d3, [x28, #768]"
       ]
     },
     "cvttps2pi mm0, xmm0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0x2c",
       "ExpectedArm64ASM": [
-        "fcvtzs v2.2s, v16.2s",
-        "str d2, [x28, #768]"
+        "mov v2.16b, v16.16b",
+        "fcvtzs v3.2s, v2.2s",
+        "str d3, [x28, #768]"
       ]
     },
     "cvtps2pi mm0, [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x2d",
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
-        "frinti v2.2s, v2.2s",
-        "fcvtzs v2.2s, v2.2s",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "ldr d2, [x20]",
+        "frinti v3.2s, v2.2s",
+        "fcvtzs v3.2s, v3.2s",
+        "str d3, [x28, #768]"
       ]
     },
     "cvtps2pi mm0, xmm0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x2d",
       "ExpectedArm64ASM": [
-        "frinti v2.2s, v16.2s",
-        "fcvtzs v2.2s, v2.2s",
-        "str d2, [x28, #768]"
+        "mov v2.16b, v16.16b",
+        "frinti v3.2s, v2.2s",
+        "fcvtzs v3.2s, v3.2s",
+        "str d3, [x28, #768]"
       ]
     },
     "ucomiss xmm0, xmm1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 15,
       "Comment": "0x0f 0x2e",
       "ExpectedArm64ASM": [
-        "fcmp s16, s17",
-        "mov w27, #0x0",
-        "cset w20, eq",
-        "cset w21, lo",
-        "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
-        "msr nzcv, x20"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fcmp s2, s3",
+        "mov w20, #0x0",
+        "cset w21, eq",
+        "cset w22, lo",
+        "cset w23, vs",
+        "orr w24, w22, w23",
+        "lsl x22, x24, #29",
+        "orr w24, w21, w23",
+        "orr w21, w22, w24, lsl #30",
+        "eor w22, w23, #0x1",
+        "mov x26, x22",
+        "mov x27, x20",
+        "msr nzcv, x21"
       ]
     },
     "comiss xmm0, xmm1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 15,
       "Comment": "0x0f 0x2f",
       "ExpectedArm64ASM": [
-        "fcmp s16, s17",
-        "mov w27, #0x0",
-        "cset w20, eq",
-        "cset w21, lo",
-        "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
-        "msr nzcv, x20"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fcmp s2, s3",
+        "mov w20, #0x0",
+        "cset w21, eq",
+        "cset w22, lo",
+        "cset w23, vs",
+        "orr w24, w22, w23",
+        "lsl x22, x24, #29",
+        "orr w24, w21, w23",
+        "orr w21, w22, w24, lsl #30",
+        "eor w22, w23, #0x1",
+        "mov x26, x22",
+        "mov x27, x20",
+        "msr nzcv, x21"
       ]
     },
     "rdtsc": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x31",
       "ExpectedArm64ASM": [
         "mrs x20, S3_3_c14_c0_2",
-        "lsl w4, w20, #7",
-        "lsr x6, x20, #25"
+        "lsl w21, w20, #7",
+        "lsr x22, x20, #25",
+        "mov x4, x21",
+        "mov x6, x22"
       ]
     },
     "cmovo ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x40",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, vs",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, vs",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovo eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x40",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, vs"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, vs",
+        "mov x4, x22"
       ]
     },
     "cmovo rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x40",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, vs"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, vs",
+        "mov x4, x22"
       ]
     },
     "cmovno ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x41",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, vc",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, vc",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovno eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x41",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, vc"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, vc",
+        "mov x4, x22"
       ]
     },
     "cmovno rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x41",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, vc"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, vc",
+        "mov x4, x22"
       ]
     },
     "cmovb ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x42",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, hs",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, hs",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovb eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x42",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, hs"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, hs",
+        "mov x4, x22"
       ]
     },
     "cmovb rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x42",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, hs"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, hs",
+        "mov x4, x22"
       ]
     },
     "cmovnb ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x43",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, lo",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, lo",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovnb eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x43",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, lo"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, lo",
+        "mov x4, x22"
       ]
     },
     "cmovnb rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x43",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, lo"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, lo",
+        "mov x4, x22"
       ]
     },
     "cmovz ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x44",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, eq",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, eq",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovz eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x44",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, eq"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, eq",
+        "mov x4, x22"
       ]
     },
     "cmovz rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x44",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, eq"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, eq",
+        "mov x4, x22"
       ]
     },
     "cmovnz ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x45",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, ne",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, ne",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovnz eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x45",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, ne"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, ne",
+        "mov x4, x22"
       ]
     },
     "cmovnz rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x45",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, ne"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, ne",
+        "mov x4, x22"
       ]
     },
     "cmovbe ax, bx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0x46",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, hs",
-        "csel w20, w7, w20, eq",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, hs",
+        "csel w23, w21, w22, eq",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovbe eax, ebx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x46",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, hs",
-        "csel w4, w7, w20, eq"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, hs",
+        "csel w20, w21, w22, eq",
+        "mov x4, x20"
       ]
     },
     "cmovbe rax, rbx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x46",
       "ExpectedArm64ASM": [
-        "csel x20, x7, x4, hs",
-        "csel x4, x7, x20, eq"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, hs",
+        "csel x20, x21, x22, eq",
+        "mov x4, x20"
       ]
     },
     "cmovnbe ax, bx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0x47",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, lo",
-        "csel w20, w20, w4, ne",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, lo",
+        "csel w21, w22, w20, ne",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "cmovnbe eax, ebx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x47",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, lo",
-        "csel w4, w20, w4, ne"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, lo",
+        "csel w21, w22, w20, ne",
+        "mov x4, x21"
       ]
     },
     "cmovnbe rax, rbx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x47",
       "ExpectedArm64ASM": [
-        "csel x20, x7, x4, lo",
-        "csel x4, x20, x4, ne"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, lo",
+        "csel x21, x22, x20, ne",
+        "mov x4, x21"
       ]
     },
     "cmovs ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x48",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, mi",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, mi",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovs eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x48",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, mi"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, mi",
+        "mov x4, x22"
       ]
     },
     "cmovs rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x48",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, mi"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, mi",
+        "mov x4, x22"
       ]
     },
     "cmovns ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x49",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, pl",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, pl",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovns eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x49",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, pl"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, pl",
+        "mov x4, x22"
       ]
     },
     "cmovns rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x49",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, pl"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, pl",
+        "mov x4, x22"
       ]
     },
     "cmovpe ax, bx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0x4a",
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csel w20, w7, w4, ne",
-        "bfxil x4, x20, #0, #16",
-        "msr nzcv, x21"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov x22, x26",
+        "eor w23, w22, w22, lsr #4",
+        "eor w22, w23, w23, lsr #2",
+        "eon w23, w22, w22, lsr #1",
+        "mrs x22, nzcv",
+        "tst w23, #0x1",
+        "csel w23, w21, w20, ne",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #16",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "cmovpe eax, ebx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0x4a",
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csel w4, w7, w4, ne",
-        "msr nzcv, x21"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov x22, x26",
+        "eor w23, w22, w22, lsr #4",
+        "eor w22, w23, w23, lsr #2",
+        "eon w23, w22, w22, lsr #1",
+        "mrs x22, nzcv",
+        "tst w23, #0x1",
+        "csel w23, w21, w20, ne",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "cmovpe rax, rbx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0x4a",
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csel x4, x7, x4, ne",
-        "msr nzcv, x21"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov x22, x26",
+        "eor w23, w22, w22, lsr #4",
+        "eor w22, w23, w23, lsr #2",
+        "eon w23, w22, w22, lsr #1",
+        "mrs x22, nzcv",
+        "tst w23, #0x1",
+        "csel x23, x21, x20, ne",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "cmovnp ax, bx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0x4b",
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csel w20, w7, w4, ne",
-        "bfxil x4, x20, #0, #16",
-        "msr nzcv, x21"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov x22, x26",
+        "eor w23, w22, w22, lsr #4",
+        "eor w22, w23, w23, lsr #2",
+        "eor w23, w22, w22, lsr #1",
+        "mrs x22, nzcv",
+        "tst w23, #0x1",
+        "csel w23, w21, w20, ne",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #16",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "cmovnp eax, ebx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0x4b",
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csel w4, w7, w4, ne",
-        "msr nzcv, x21"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov x22, x26",
+        "eor w23, w22, w22, lsr #4",
+        "eor w22, w23, w23, lsr #2",
+        "eor w23, w22, w22, lsr #1",
+        "mrs x22, nzcv",
+        "tst w23, #0x1",
+        "csel w23, w21, w20, ne",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "cmovnp rax, rbx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0x4b",
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csel x4, x7, x4, ne",
-        "msr nzcv, x21"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov x22, x26",
+        "eor w23, w22, w22, lsr #4",
+        "eor w22, w23, w23, lsr #2",
+        "eor w23, w22, w22, lsr #1",
+        "mrs x22, nzcv",
+        "tst w23, #0x1",
+        "csel x23, x21, x20, ne",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "cmovl ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x4c",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, lt",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, lt",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovl eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x4c",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, lt"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, lt",
+        "mov x4, x22"
       ]
     },
     "cmovl rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x4c",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, lt"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, lt",
+        "mov x4, x22"
       ]
     },
     "cmovnl ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x4d",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, ge",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, ge",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovnl eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x4d",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, ge"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, ge",
+        "mov x4, x22"
       ]
     },
     "cmovnl rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x4d",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, ge"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, ge",
+        "mov x4, x22"
       ]
     },
     "cmovle ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x4e",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, le",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, le",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovle eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x4e",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, le"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, le",
+        "mov x4, x22"
       ]
     },
     "cmovle rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x4e",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, le"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, le",
+        "mov x4, x22"
       ]
     },
     "cmovnle ax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x4f",
       "ExpectedArm64ASM": [
-        "csel w20, w7, w4, gt",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, gt",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "cmovnle eax, ebx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x4f",
       "ExpectedArm64ASM": [
-        "csel w4, w7, w4, gt"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel w22, w21, w20, gt",
+        "mov x4, x22"
       ]
     },
     "cmovnle rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x4f",
       "ExpectedArm64ASM": [
-        "csel x4, x7, x4, gt"
+        "mov x20, x4",
+        "mov x21, x7",
+        "csel x22, x21, x20, gt",
+        "mov x4, x22"
       ]
     },
     "movmskps eax, xmm0": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0x50",
       "ExpectedArm64ASM": [
-        "ushr v2.4s, v16.4s, #31",
-        "ldr q3, [x28, #2144]",
-        "ushl v2.4s, v2.4s, v3.4s",
-        "addv s2, v2.4s",
-        "mov w4, v2.s[0]"
+        "mov v2.16b, v16.16b",
+        "ushr v3.4s, v2.4s, #31",
+        "ldr q2, [x28, #2144]",
+        "ushl v4.4s, v3.4s, v2.4s",
+        "addv s2, v4.4s",
+        "mov w20, v2.s[0]",
+        "mov x4, x20"
       ]
     },
     "movmskps rax, xmm0": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0x50",
       "ExpectedArm64ASM": [
-        "ushr v2.4s, v16.4s, #31",
-        "ldr q3, [x28, #2144]",
-        "ushl v2.4s, v2.4s, v3.4s",
-        "addv s2, v2.4s",
-        "mov w4, v2.s[0]"
+        "mov v2.16b, v16.16b",
+        "ushr v3.4s, v2.4s, #31",
+        "ldr q2, [x28, #2144]",
+        "ushl v4.4s, v3.4s, v2.4s",
+        "addv s2, v4.4s",
+        "mov w20, v2.s[0]",
+        "mov x4, x20"
       ]
     },
     "sqrtps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0x51",
       "ExpectedArm64ASM": [
-        "fsqrt v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "fsqrt v3.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "rsqrtps xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0x0f 0x52"
       ],
       "ExpectedArm64ASM": [
+        "mov v2.16b, v17.16b",
         "fmov v0.4s, #0x70 (1.0000)",
-        "fsqrt v1.4s, v17.4s",
-        "fdiv v16.4s, v0.4s, v1.4s"
+        "fsqrt v1.4s, v2.4s",
+        "fdiv v3.4s, v0.4s, v1.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "rcpps xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "0x0f 0x53"
       ],
       "ExpectedArm64ASM": [
+        "mov v2.16b, v17.16b",
         "fmov v0.4s, #0x70 (1.0000)",
-        "fdiv v16.4s, v0.4s, v17.4s"
+        "fdiv v3.4s, v0.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "andps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x54",
       "ExpectedArm64ASM": [
-        "and v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "and v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "andnps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x55",
       "ExpectedArm64ASM": [
-        "bic v16.16b, v17.16b, v16.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "bic v4.16b, v2.16b, v3.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "orps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x56",
       "ExpectedArm64ASM": [
-        "orr v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "orr v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "xorps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x57",
       "ExpectedArm64ASM": [
-        "eor v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "eor v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "addps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x58",
       "ExpectedArm64ASM": [
-        "fadd v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fadd v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "mulps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x59",
       "ExpectedArm64ASM": [
-        "fmul v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fmul v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cvtps2pd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0x5a",
       "ExpectedArm64ASM": [
-        "fcvtl v16.2d, v17.2s"
+        "mov v2.16b, v17.16b",
+        "fcvtl v3.2d, v2.2s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "cvtps2pd xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x5a",
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
-        "fcvtl v16.2d, v2.2s"
+        "mov x20, x4",
+        "ldr d2, [x20]",
+        "fcvtl v3.2d, v2.2s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "cvtdq2ps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0x5b",
       "ExpectedArm64ASM": [
-        "scvtf v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "scvtf v3.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "subps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x5c",
       "ExpectedArm64ASM": [
-        "fsub v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fsub v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "minps xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x5d",
       "ExpectedArm64ASM": [
-        "fcmgt v0.4s, v17.4s, v16.4s",
-        "bif v16.16b, v17.16b, v0.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmgt v0.4s, v2.4s, v3.4s",
+        "mov v4.16b, v3.16b",
+        "bif v4.16b, v2.16b, v0.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "divps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x5e",
       "ExpectedArm64ASM": [
-        "fdiv v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fdiv v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "maxps xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x5f",
       "ExpectedArm64ASM": [
-        "fcmgt v0.4s, v17.4s, v16.4s",
-        "bit v16.16b, v17.16b, v0.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmgt v0.4s, v2.4s, v3.4s",
+        "mov v4.16b, v3.16b",
+        "bit v4.16b, v2.16b, v0.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "punpcklbw mm0, mm1": {
@@ -791,18 +1064,19 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "zip1 v2.8b, v2.8b, v3.8b",
-        "str d2, [x28, #768]"
+        "zip1 v4.8b, v2.8b, v3.8b",
+        "str d4, [x28, #768]"
       ]
     },
     "punpcklbw mm0, [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x60",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ldr d3, [x4]",
-        "zip1 v2.8b, v2.8b, v3.8b",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "ldr d3, [x20]",
+        "zip1 v4.8b, v2.8b, v3.8b",
+        "str d4, [x28, #768]"
       ]
     },
     "punpcklwd mm0, mm1": {
@@ -811,18 +1085,19 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "zip1 v2.4h, v2.4h, v3.4h",
-        "str d2, [x28, #768]"
+        "zip1 v4.4h, v2.4h, v3.4h",
+        "str d4, [x28, #768]"
       ]
     },
     "punpcklwd mm0, [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x61",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ldr d3, [x4]",
-        "zip1 v2.4h, v2.4h, v3.4h",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "ldr d3, [x20]",
+        "zip1 v4.4h, v2.4h, v3.4h",
+        "str d4, [x28, #768]"
       ]
     },
     "punpckldq mm0, mm1": {
@@ -831,18 +1106,19 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "zip1 v2.2s, v2.2s, v3.2s",
-        "str d2, [x28, #768]"
+        "zip1 v4.2s, v2.2s, v3.2s",
+        "str d4, [x28, #768]"
       ]
     },
     "punpckldq mm0, [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x62",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ldr d3, [x4]",
-        "zip1 v2.2s, v2.2s, v3.2s",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "ldr d3, [x20]",
+        "zip1 v4.2s, v2.2s, v3.2s",
+        "str d4, [x28, #768]"
       ]
     },
     "packsswb mm0, mm1": {
@@ -851,20 +1127,21 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "zip1 v2.2d, v2.2d, v3.2d",
-        "sqxtn v2.8b, v2.8h",
-        "str d2, [x28, #768]"
+        "zip1 v4.2d, v2.2d, v3.2d",
+        "sqxtn v4.8b, v4.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "packsswb mm0, [rax]": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x63",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ldr d3, [x4]",
-        "zip1 v2.2d, v2.2d, v3.2d",
-        "sqxtn v2.8b, v2.8h",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "ldr d3, [x20]",
+        "zip1 v4.2d, v2.2d, v3.2d",
+        "sqxtn v4.8b, v4.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "packsswb mm0, mm0": {
@@ -872,9 +1149,9 @@
       "Comment": "0x0f 0x63",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "zip1 v2.2d, v2.2d, v2.2d",
-        "sqxtn v2.8b, v2.8h",
-        "str d2, [x28, #768]"
+        "zip1 v3.2d, v2.2d, v2.2d",
+        "sqxtn v3.8b, v3.8h",
+        "str d3, [x28, #768]"
       ]
     },
     "pcmpgtb mm0, mm1": {
@@ -883,8 +1160,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "cmgt v2.16b, v3.16b, v2.16b",
-        "str d2, [x28, #768]"
+        "cmgt v4.16b, v3.16b, v2.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "pcmpgtw mm0, mm1": {
@@ -893,8 +1170,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "cmgt v2.8h, v3.8h, v2.8h",
-        "str d2, [x28, #768]"
+        "cmgt v4.8h, v3.8h, v2.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "pcmpgtd mm0, mm1": {
@@ -903,8 +1180,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "cmgt v2.4s, v3.4s, v2.4s",
-        "str d2, [x28, #768]"
+        "cmgt v4.4s, v3.4s, v2.4s",
+        "str d4, [x28, #768]"
       ]
     },
     "punpckhbw mm0, mm1": {
@@ -913,18 +1190,19 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "zip2 v2.8b, v2.8b, v3.8b",
-        "str d2, [x28, #768]"
+        "zip2 v4.8b, v2.8b, v3.8b",
+        "str d4, [x28, #768]"
       ]
     },
     "punpckhbw mm0, [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x68",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ldr d3, [x4]",
-        "zip2 v2.8b, v2.8b, v3.8b",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "ldr d3, [x20]",
+        "zip2 v4.8b, v2.8b, v3.8b",
+        "str d4, [x28, #768]"
       ]
     },
     "punpckhwd mm0, mm1": {
@@ -933,18 +1211,19 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "zip2 v2.4h, v2.4h, v3.4h",
-        "str d2, [x28, #768]"
+        "zip2 v4.4h, v2.4h, v3.4h",
+        "str d4, [x28, #768]"
       ]
     },
     "punpckhwd mm0, [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x69",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ldr d3, [x4]",
-        "zip2 v2.4h, v2.4h, v3.4h",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "ldr d3, [x20]",
+        "zip2 v4.4h, v2.4h, v3.4h",
+        "str d4, [x28, #768]"
       ]
     },
     "punpckhdq mm0, mm1": {
@@ -953,18 +1232,19 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "zip2 v2.2s, v2.2s, v3.2s",
-        "str d2, [x28, #768]"
+        "zip2 v4.2s, v2.2s, v3.2s",
+        "str d4, [x28, #768]"
       ]
     },
     "punpckhdq mm0, [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x6a",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ldr d3, [x4]",
-        "zip2 v2.2s, v2.2s, v3.2s",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "ldr d3, [x20]",
+        "zip2 v4.2s, v2.2s, v3.2s",
+        "str d4, [x28, #768]"
       ]
     },
     "packssdw mm0, mm1": {
@@ -973,24 +1253,26 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "zip1 v2.2d, v2.2d, v3.2d",
-        "sqxtn v2.4h, v2.4s",
-        "str d2, [x28, #768]"
+        "zip1 v4.2d, v2.2d, v3.2d",
+        "sqxtn v4.4h, v4.4s",
+        "str d4, [x28, #768]"
       ]
     },
     "movd mm0, eax": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0x6e",
       "ExpectedArm64ASM": [
-        "fmov s2, w4",
+        "mov x20, x4",
+        "fmov s2, w20",
         "str d2, [x28, #768]"
       ]
     },
     "movd mm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0x6e",
       "ExpectedArm64ASM": [
-        "ldr s2, [x4]",
+        "mov x20, x4",
+        "ldr s2, [x20]",
         "str d2, [x28, #768]"
       ]
     },
@@ -1011,10 +1293,11 @@
       ]
     },
     "movq mm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0x6f",
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
+        "mov x20, x4",
+        "ldr d2, [x20]",
         "str d2, [x28, #768]"
       ]
     },
@@ -1023,17 +1306,18 @@
       "Comment": "0x0f 0x70",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
-        "dup v2.4h, v2.h[0]",
-        "str d2, [x28, #768]"
+        "dup v3.4h, v2.h[0]",
+        "str d3, [x28, #768]"
       ]
     },
     "pshufw mm0, [rax], 0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x70",
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
-        "dup v2.4h, v2.h[0]",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "ldr d2, [x20]",
+        "dup v3.4h, v2.h[0]",
+        "str d3, [x28, #768]"
       ]
     },
     "pshufw mm0, mm1, 1": {
@@ -1043,19 +1327,20 @@
         "ldr d2, [x28, #784]",
         "ldr x0, [x28, #1744]",
         "ldr d3, [x0, #16]",
-        "tbl v2.8b, {v2.16b}, v3.8b",
-        "str d2, [x28, #768]"
+        "tbl v4.8b, {v2.16b}, v3.8b",
+        "str d4, [x28, #768]"
       ]
     },
     "pshufw mm0, [rax], 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x70",
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
+        "mov x20, x4",
+        "ldr d2, [x20]",
         "ldr x0, [x28, #1744]",
         "ldr d3, [x0, #16]",
-        "tbl v2.8b, {v2.16b}, v3.8b",
-        "str d2, [x28, #768]"
+        "tbl v4.8b, {v2.16b}, v3.8b",
+        "str d4, [x28, #768]"
       ]
     },
     "pshufw mm0, mm1, 0xff": {
@@ -1063,17 +1348,18 @@
       "Comment": "0x0f 0x70",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
-        "dup v2.4h, v2.h[3]",
-        "str d2, [x28, #768]"
+        "dup v3.4h, v2.h[3]",
+        "str d3, [x28, #768]"
       ]
     },
     "pshufw mm0, [rax], 0xff": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0x70",
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
-        "dup v2.4h, v2.h[3]",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "ldr d2, [x20]",
+        "dup v3.4h, v2.h[3]",
+        "str d3, [x28, #768]"
       ]
     },
     "pcmpeqb mm0, mm1": {
@@ -1082,8 +1368,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "cmeq v2.16b, v3.16b, v2.16b",
-        "str d2, [x28, #768]"
+        "cmeq v4.16b, v3.16b, v2.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "pcmpeqw mm0, mm1": {
@@ -1092,8 +1378,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "cmeq v2.8h, v3.8h, v2.8h",
-        "str d2, [x28, #768]"
+        "cmeq v4.8h, v3.8h, v2.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "pcmpeqd mm0, mm1": {
@@ -1102,8 +1388,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "cmeq v2.4s, v3.4s, v2.4s",
-        "str d2, [x28, #768]"
+        "cmeq v4.4s, v3.4s, v2.4s",
+        "str d4, [x28, #768]"
       ]
     },
     "emms": {
@@ -1115,19 +1401,21 @@
       ]
     },
     "movd eax, mm0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0x7e",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "mov w4, v2.s[0]"
+        "mov w20, v2.s[0]",
+        "mov x4, x20"
       ]
     },
     "movd [rax], mm0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0x7e",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "str s2, [x4]"
+        "mov x20, x4",
+        "str s2, [x20]"
       ]
     },
     "db 0x0f, 0x7f, 0xc1": {
@@ -1143,239 +1431,307 @@
       ]
     },
     "movq [rax], mm0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0x7f",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "str d2, [x4]"
+        "mov x20, x4",
+        "str d2, [x20]"
       ]
     },
     "seto al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x90",
       "ExpectedArm64ASM": [
         "cset x20, vs",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setno al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x91",
       "ExpectedArm64ASM": [
         "cset x20, vc",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setb al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x92",
       "ExpectedArm64ASM": [
         "cset x20, hs",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setnb al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x93",
       "ExpectedArm64ASM": [
         "cset x20, lo",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setz al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x94",
       "ExpectedArm64ASM": [
         "cset x20, eq",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setnz al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x95",
       "ExpectedArm64ASM": [
         "cset x20, ne",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setbe al": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0x96",
       "ExpectedArm64ASM": [
         "mov w20, #0x1",
         "cset x21, hs",
-        "csel x20, x20, x21, eq",
-        "bfxil x4, x20, #0, #8"
+        "csel x22, x20, x21, eq",
+        "mov x20, x4",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #8",
+        "mov x4, x21"
       ]
     },
     "setnbe al": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0x97",
       "ExpectedArm64ASM": [
         "cset x20, lo",
-        "csel x20, x20, xzr, ne",
-        "bfxil x4, x20, #0, #8"
+        "csel x21, x20, xzr, ne",
+        "mov x20, x4",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22"
       ]
     },
     "sets al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x98",
       "ExpectedArm64ASM": [
         "cset x20, mi",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setns al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x99",
       "ExpectedArm64ASM": [
         "cset x20, pl",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setpe al": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0x9a",
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "and x20, x20, #0x1",
-        "bfxil x4, x20, #0, #8"
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "and x20, x21, #0x1",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setnp al": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0x9b",
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "and x20, x20, #0x1",
-        "bfxil x4, x20, #0, #8"
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "and x20, x21, #0x1",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setl al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x9c",
       "ExpectedArm64ASM": [
         "cset x20, lt",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setnl al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x9d",
       "ExpectedArm64ASM": [
         "cset x20, ge",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setle al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x9e",
       "ExpectedArm64ASM": [
         "cset x20, le",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "setnle al": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0x9f",
       "ExpectedArm64ASM": [
         "cset x20, gt",
-        "bfxil x4, x20, #0, #8"
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #8",
+        "mov x4, x22"
       ]
     },
     "push fs": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xa0",
       "ExpectedArm64ASM": [
-        "ldr x20, [x28, #176]",
-        "str x20, [x8, #-8]!"
+        "mov x20, x8",
+        "ldr x21, [x28, #176]",
+        "mov x22, x20",
+        "str x21, [x22, #-8]!",
+        "mov x8, x22"
       ]
     },
     "pop fs": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xa1",
       "ExpectedArm64ASM": [
-        "ldr x20, [x8]",
-        "add x8, x8, #0x8 (8)",
-        "strh w20, [x28, #146]",
-        "ubfx w20, w20, #3, #13",
+        "mov x20, x8",
+        "ldr x21, [x20]",
+        "add x22, x20, #0x8 (8)",
+        "mov x8, x22",
+        "strh w21, [x28, #146]",
+        "ubfx w20, w21, #3, #13",
         "add x0, x28, x20, lsl #2",
-        "ldr w20, [x0, #896]",
-        "str w20, [x28, #176]"
+        "ldr w21, [x0, #896]",
+        "str w21, [x28, #176]"
       ]
     },
     "bt ax, bx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0xa3",
       "ExpectedArm64ASM": [
-        "and x20, x7, #0xf",
-        "lsr w20, w4, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x7",
+        "mov x21, x4",
+        "and x22, x20, #0xf",
+        "lsr w20, w21, w22",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "bt [rax], bx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xa3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #13",
-        "ldrb w21, [x4, x21, sxtx]",
-        "lsr w20, w21, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #13",
+        "ldrb w20, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "bt eax, ebx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0xa3",
       "ExpectedArm64ASM": [
-        "lsr w20, w4, w7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "lsr w22, w21, w20",
+        "ubfx x20, x22, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "bt [rax], ebx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xa3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #29",
-        "ldrb w21, [x4, x21, sxtx]",
-        "lsr w20, w21, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #29",
+        "ldrb w20, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "bt rax, rbx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0xa3",
       "ExpectedArm64ASM": [
-        "lsr x20, x4, x7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "lsr x22, x21, x20",
+        "ubfx x20, x22, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "bt [rax], rbx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xa3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "asr x21, x7, #3",
-        "ldrb w21, [x4, x21, sxtx]",
-        "lsr w20, w21, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "asr x23, x20, #3",
+        "ldrb w20, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "shld ax, bx, 0": {
@@ -1384,143 +1740,177 @@
       "ExpectedArm64ASM": []
     },
     "shld ax, bx, 1": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 19,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w4",
-        "lsl x22, x21, #1",
-        "lsr w20, w20, #15",
-        "orr x26, x22, x20",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "ubfx x20, x21, #15, #1",
-        "mrs x22, nzcv",
-        "orr w20, w22, w20, lsl #29",
-        "eor w21, w26, w21",
-        "ubfx x21, x21, #15, #1",
-        "orr w20, w20, w21, lsl #28",
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "lsl x23, x22, #1",
+        "lsr w24, w21, #15",
+        "orr x21, x23, x24",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w21, lsl #16",
+        "ubfx x20, x22, #15, #1",
+        "mrs x23, nzcv",
+        "orr w24, w23, w20, lsl #29",
+        "mov x26, x21",
+        "eor w20, w21, w22",
+        "ubfx x21, x20, #15, #1",
+        "orr w20, w24, w21, lsl #28",
         "msr nzcv, x20"
       ]
     },
     "shld ax, bx, 15": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 16,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w4",
-        "lsl x22, x21, #15",
-        "lsr w20, w20, #1",
-        "orr x26, x22, x20",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "ubfx x20, x21, #1, #1",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "lsl x23, x22, #15",
+        "lsr w24, w21, #1",
+        "orr x21, x23, x24",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w21, lsl #16",
+        "ubfx x20, x22, #1, #1",
+        "mrs x22, nzcv",
+        "orr w23, w22, w20, lsl #29",
+        "mov x26, x21",
+        "msr nzcv, x23"
       ]
     },
     "shld ax, bx, 16": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 15,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w4",
-        "lsl x22, x21, #16",
-        "orr x26, x22, x20",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "ubfx x20, x21, #0, #1",
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "lsl x23, x22, #16",
+        "orr x24, x23, x21",
+        "mov x21, x20",
+        "bfxil x21, x24, #0, #16",
+        "mov x4, x21",
+        "cmn wzr, w24, lsl #16",
+        "ubfx x20, x22, #0, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "orr w22, w21, w20, lsl #29",
+        "mov x26, x24",
+        "msr nzcv, x22"
       ]
     },
     "shld ax, bx, 31": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 16,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w4",
-        "lsl x22, x21, #31",
-        "lsr w20, w20, #17",
-        "orr x26, x22, x20",
-        "bfxil x4, x26, #0, #16",
-        "cmn wzr, w26, lsl #16",
-        "ubfx x20, x21, #1, #1",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "lsl x23, x22, #31",
+        "lsr w24, w21, #17",
+        "orr x21, x23, x24",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "cmn wzr, w21, lsl #16",
+        "ubfx x20, x22, #1, #1",
+        "mrs x22, nzcv",
+        "orr w23, w22, w20, lsl #29",
+        "mov x26, x21",
+        "msr nzcv, x23"
       ]
     },
     "shld eax, ebx, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "mov w4, w4"
+        "mov x20, x4",
+        "mov w21, w20",
+        "mov x4, x21"
       ]
     },
     "shld eax, ebx, 1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 15,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "extr w4, w21, w20, #31",
-        "tst w4, w4",
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "mov w22, w20",
+        "extr w20, w22, w21, #31",
+        "mov x4, x20",
+        "tst w20, w20",
+        "ubfx x21, x22, #31, #1",
+        "mrs x23, nzcv",
+        "orr w24, w23, w21, lsl #29",
+        "mov x26, x20",
+        "eor w21, w20, w22",
         "ubfx x20, x21, #31, #1",
-        "mrs x22, nzcv",
-        "orr w20, w22, w20, lsl #29",
-        "mov x26, x4",
-        "eor w21, w4, w21",
-        "ubfx x21, x21, #31, #1",
-        "orr w20, w20, w21, lsl #28",
-        "msr nzcv, x20"
+        "orr w21, w24, w20, lsl #28",
+        "msr nzcv, x21"
       ]
     },
     "shld eax, ebx, 15": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "extr w4, w21, w20, #17",
-        "tst w4, w4",
-        "ubfx x20, x21, #17, #1",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "mov x26, x4",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "mov w22, w20",
+        "extr w20, w22, w21, #17",
+        "mov x4, x20",
+        "tst w20, w20",
+        "ubfx x21, x22, #17, #1",
+        "mrs x22, nzcv",
+        "orr w23, w22, w21, lsl #29",
+        "mov x26, x20",
+        "msr nzcv, x23"
       ]
     },
     "shld eax, ebx, 16": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "extr w4, w21, w20, #16",
-        "tst w4, w4",
-        "ubfx x20, x21, #16, #1",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "mov x26, x4",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "mov w22, w20",
+        "extr w20, w22, w21, #16",
+        "mov x4, x20",
+        "tst w20, w20",
+        "ubfx x21, x22, #16, #1",
+        "mrs x22, nzcv",
+        "orr w23, w22, w21, lsl #29",
+        "mov x26, x20",
+        "msr nzcv, x23"
       ]
     },
     "shld eax, ebx, 31": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "extr w4, w21, w20, #1",
-        "tst w4, w4",
-        "ubfx x20, x21, #1, #1",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "mov x26, x4",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "mov w22, w20",
+        "extr w20, w22, w21, #1",
+        "mov x4, x20",
+        "tst w20, w20",
+        "ubfx x21, x22, #1, #1",
+        "mrs x22, nzcv",
+        "orr w23, w22, w21, lsl #29",
+        "mov x26, x20",
+        "msr nzcv, x23"
       ]
     },
     "shld rax, rbx, 0": {
@@ -1529,1707 +1919,2126 @@
       "ExpectedArm64ASM": []
     },
     "shld rax, rbx, 1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "mov x20, x4",
-        "extr x4, x20, x7, #63",
-        "tst x4, x4",
+        "mov x20, x7",
+        "mov x21, x4",
+        "extr x22, x21, x20, #63",
+        "mov x4, x22",
+        "tst x22, x22",
+        "lsr x20, x21, #63",
+        "mrs x23, nzcv",
+        "orr w24, w23, w20, lsl #29",
+        "mov x26, x22",
+        "eor x20, x22, x21",
         "lsr x21, x20, #63",
-        "mrs x22, nzcv",
-        "orr w21, w22, w21, lsl #29",
-        "mov x26, x4",
-        "eor x20, x4, x20",
-        "lsr x20, x20, #63",
-        "orr w20, w21, w20, lsl #28",
+        "orr w20, w24, w21, lsl #28",
         "msr nzcv, x20"
       ]
     },
     "shld rax, rbx, 15": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "mov x20, x4",
-        "extr x4, x20, x7, #49",
-        "tst x4, x4",
-        "ubfx x20, x20, #49, #1",
+        "mov x20, x7",
+        "mov x21, x4",
+        "extr x22, x21, x20, #49",
+        "mov x4, x22",
+        "tst x22, x22",
+        "ubfx x20, x21, #49, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "mov x26, x4",
-        "msr nzcv, x20"
+        "orr w23, w21, w20, lsl #29",
+        "mov x26, x22",
+        "msr nzcv, x23"
       ]
     },
     "shld rax, rbx, 32": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "mov x20, x4",
-        "extr x4, x20, x7, #32",
-        "tst x4, x4",
-        "ubfx x20, x20, #32, #1",
+        "mov x20, x7",
+        "mov x21, x4",
+        "extr x22, x21, x20, #32",
+        "mov x4, x22",
+        "tst x22, x22",
+        "ubfx x20, x21, #32, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "mov x26, x4",
-        "msr nzcv, x20"
+        "orr w23, w21, w20, lsl #29",
+        "mov x26, x22",
+        "msr nzcv, x23"
       ]
     },
     "shld rax, rbx, 63": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xac",
       "ExpectedArm64ASM": [
-        "mov x20, x4",
-        "extr x4, x20, x7, #1",
-        "tst x4, x4",
-        "ubfx x20, x20, #1, #1",
+        "mov x20, x7",
+        "mov x21, x4",
+        "extr x22, x21, x20, #1",
+        "mov x4, x22",
+        "tst x22, x22",
+        "ubfx x20, x21, #1, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "mov x26, x4",
-        "msr nzcv, x20"
+        "orr w23, w21, w20, lsl #29",
+        "mov x26, x22",
+        "msr nzcv, x23"
       ]
     },
     "shld ax, bx, cl": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 33,
       "Comment": "0x0f 0xad",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w4",
-        "and x22, x5, #0x1f",
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "mov x23, x5",
+        "and x24, x23, #0x1f",
         "mov w23, #0x10",
-        "sub x23, x23, x22",
-        "lsl x24, x21, x22",
-        "lsr w20, w20, w23",
-        "orr x20, x24, x20",
+        "sub x25, x23, x24",
+        "lsl x23, x22, x24",
+        "lsr w30, w21, w25",
+        "orr x21, x23, x30",
         "mrs x23, nzcv",
-        "cmp x22, #0x0 (0)",
-        "csel x20, x21, x20, eq",
-        "bfxil x4, x20, #0, #16",
+        "cmp x24, #0x0 (0)",
+        "csel x25, x22, x21, eq",
+        "mov x21, x20",
+        "bfxil x21, x25, #0, #16",
+        "mov x4, x21",
+        "mov x20, x26",
         "msr nzcv, x23",
-        "cbz w22, #+0x30",
-        "cmn wzr, w20, lsl #16",
-        "mov x26, x20",
+        "mov x21, x20",
+        "cbz w24, #+0x30",
+        "cmn wzr, w25, lsl #16",
+        "mov x21, x25",
         "mov w0, #0x10",
-        "sub w0, w0, w22",
-        "lsr w0, w21, w0",
-        "eor w2, w21, w20",
+        "sub w0, w0, w24",
+        "lsr w0, w22, w0",
+        "eor w2, w22, w25",
         "mrs x1, nzcv",
         "bfi w1, w0, #29, #1",
         "lsr w2, w2, #15",
         "bfi w1, w2, #28, #1",
-        "msr nzcv, x1"
+        "msr nzcv, x1",
+        "mov x26, x21"
       ]
     },
     "shld eax, ebx, cl": {
-      "ExpectedInstructionCount": 22,
+      "ExpectedInstructionCount": 29,
       "Comment": "0x0f 0xad",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "and x22, x5, #0x1f",
-        "neg x23, x22",
-        "lsl x24, x21, x22",
-        "lsr w20, w20, w23",
-        "orr x20, x24, x20",
-        "mrs x23, nzcv",
-        "cmp x22, #0x0 (0)",
-        "csel x20, x21, x20, eq",
-        "mov w4, w20",
-        "msr nzcv, x23",
-        "cbz w22, #+0x28",
-        "ands w26, w20, w20",
-        "neg w0, w22",
-        "lsr w0, w21, w0",
-        "eor w2, w21, w20",
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "mov w22, w20",
+        "mov x20, x5",
+        "and x23, x20, #0x1f",
+        "neg x20, x23",
+        "lsl x24, x22, x23",
+        "lsr w25, w21, w20",
+        "orr x20, x24, x25",
+        "mrs x21, nzcv",
+        "cmp x23, #0x0 (0)",
+        "csel x24, x22, x20, eq",
+        "mov w20, w24",
+        "mov x4, x20",
+        "mov x20, x26",
+        "msr nzcv, x21",
+        "mov x21, x20",
+        "cbz w23, #+0x28",
+        "ands w21, w24, w24",
+        "neg w0, w23",
+        "lsr w0, w22, w0",
+        "eor w2, w22, w24",
         "mrs x1, nzcv",
         "bfi w1, w0, #29, #1",
         "lsr w2, w2, #31",
         "bfi w1, w2, #28, #1",
-        "msr nzcv, x1"
+        "msr nzcv, x1",
+        "mov x26, x21"
       ]
     },
     "shld rax, rbx, cl": {
-      "ExpectedInstructionCount": 20,
+      "ExpectedInstructionCount": 26,
       "Comment": "0x0f 0xad",
       "ExpectedArm64ASM": [
-        "mov x20, x4",
-        "and x21, x5, #0x3f",
-        "neg x22, x21",
-        "lsl x23, x20, x21",
-        "lsr x22, x7, x22",
-        "orr x22, x23, x22",
-        "mrs x23, nzcv",
-        "cmp x21, #0x0 (0)",
-        "csel x4, x20, x22, eq",
-        "msr nzcv, x23",
-        "cbz x21, #+0x28",
-        "ands x26, x4, x4",
-        "neg x0, x21",
-        "lsr x0, x20, x0",
-        "eor x2, x20, x4",
+        "mov x20, x7",
+        "mov x21, x4",
+        "mov x22, x5",
+        "and x23, x22, #0x3f",
+        "neg x22, x23",
+        "lsl x24, x21, x23",
+        "lsr x25, x20, x22",
+        "orr x20, x24, x25",
+        "mrs x22, nzcv",
+        "cmp x23, #0x0 (0)",
+        "csel x24, x21, x20, eq",
+        "mov x4, x24",
+        "mov x20, x26",
+        "msr nzcv, x22",
+        "mov x22, x20",
+        "cbz x23, #+0x28",
+        "ands x22, x24, x24",
+        "neg x0, x23",
+        "lsr x0, x21, x0",
+        "eor x2, x21, x24",
         "mrs x1, nzcv",
         "bfi w1, w0, #29, #1",
         "lsr x2, x2, #63",
         "bfi w1, w2, #28, #1",
-        "msr nzcv, x1"
+        "msr nzcv, x1",
+        "mov x26, x22"
       ]
     },
     "push gs": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xa8",
       "ExpectedArm64ASM": [
-        "ldr x20, [x28, #168]",
-        "str x20, [x8, #-8]!"
+        "mov x20, x8",
+        "ldr x21, [x28, #168]",
+        "mov x22, x20",
+        "str x21, [x22, #-8]!",
+        "mov x8, x22"
       ]
     },
     "pop gs": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xa9",
       "ExpectedArm64ASM": [
-        "ldr x20, [x8]",
-        "add x8, x8, #0x8 (8)",
-        "strh w20, [x28, #144]",
-        "ubfx w20, w20, #3, #13",
+        "mov x20, x8",
+        "ldr x21, [x20]",
+        "add x22, x20, #0x8 (8)",
+        "mov x8, x22",
+        "strh w21, [x28, #144]",
+        "ubfx w20, w21, #3, #13",
         "add x0, x28, x20, lsl #2",
-        "ldr w20, [x0, #896]",
-        "str w20, [x28, #168]"
+        "ldr w21, [x0, #896]",
+        "str w21, [x28, #168]"
       ]
     },
     "bts ax, bx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xab",
       "ExpectedArm64ASM": [
-        "and x20, x7, #0xf",
-        "lsr w21, w4, w20",
-        "ubfx x21, x21, #0, #1",
-        "lsl x21, x21, #29",
-        "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "orr w20, w4, w20",
-        "bfxil x4, x20, #0, #16",
-        "msr nzcv, x21"
+        "mov x20, x7",
+        "mov x21, x4",
+        "and x22, x20, #0xf",
+        "lsr w20, w21, w22",
+        "ubfx x23, x20, #0, #1",
+        "lsl x20, x23, #29",
+        "mov w23, #0x1",
+        "lsl w24, w23, w22",
+        "orr w22, w21, w24",
+        "mov x23, x21",
+        "bfxil x23, x22, #0, #16",
+        "mov x4, x23",
+        "msr nzcv, x20"
       ]
     },
     "bts [rax], bx": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xab",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #13",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "orr x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #13",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "orr x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "bts eax, ebx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xab",
       "ExpectedArm64ASM": [
-        "lsr w20, w4, w7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "mov w21, #0x1",
-        "lsl w21, w21, w7",
-        "orr w4, w4, w21",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "lsr w22, w21, w20",
+        "ubfx x23, x22, #0, #1",
+        "lsl x22, x23, #29",
+        "mov w23, #0x1",
+        "lsl w24, w23, w20",
+        "orr w20, w21, w24",
+        "mov x4, x20",
+        "msr nzcv, x22"
       ]
     },
     "bts [rax], ebx": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xab",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #29",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "orr x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #29",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "orr x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "bts rax, rbx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xab",
       "ExpectedArm64ASM": [
-        "lsr x20, x4, x7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "mov w21, #0x1",
-        "lsl x21, x21, x7",
-        "orr x4, x4, x21",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "lsr x22, x21, x20",
+        "ubfx x23, x22, #0, #1",
+        "lsl x22, x23, #29",
+        "mov w23, #0x1",
+        "lsl x24, x23, x20",
+        "orr x20, x21, x24",
+        "mov x4, x20",
+        "msr nzcv, x22"
       ]
     },
     "bts [rax], rbx": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xab",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "asr x21, x7, #3",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "orr x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "asr x23, x20, #3",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "orr x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "lock bts [rax], bx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #13",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldsetalb w22, w21, [x21]",
-        "lsr w20, w21, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #13",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldsetalb w23, w21, [x20]",
+        "lsr w20, w21, w22",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "lock bts [rax], ebx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #29",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldsetalb w22, w21, [x21]",
-        "lsr w20, w21, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #29",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldsetalb w23, w21, [x20]",
+        "lsr w20, w21, w22",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "lock bts [rax], rbx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "asr x21, x7, #3",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldsetalb w22, w21, [x21]",
-        "lsr w20, w21, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "asr x23, x20, #3",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldsetalb w23, w21, [x20]",
+        "lsr w20, w21, w22",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "imul ax, bx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x0f 0xaf",
       "ExpectedArm64ASM": [
-        "sxth x20, w4",
-        "sxth x21, w7",
-        "mul x20, x20, x21",
-        "sbfx x21, x20, #16, #16",
-        "bfxil x4, x20, #0, #16",
-        "sbfx x20, x20, #15, #1",
-        "cmp x21, x20",
+        "mov x20, x4",
+        "mov x21, x7",
+        "sxth x22, w20",
+        "sxth x23, w21",
+        "mul x21, x22, x23",
+        "sbfx x22, x21, #16, #16",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "sbfx x20, x21, #15, #1",
+        "cmp x22, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul eax, ebx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xaf",
       "ExpectedArm64ASM": [
-        "smull x20, w4, w7",
-        "asr x20, x20, #32",
-        "mul w4, w4, w7",
-        "sbfx x21, x4, #31, #1",
-        "cmp x20, x21",
+        "mov x20, x4",
+        "mov x21, x7",
+        "smull x22, w20, w21",
+        "asr x23, x22, #32",
+        "mul w22, w20, w21",
+        "mov x4, x22",
+        "sbfx x20, x22, #31, #1",
+        "cmp x23, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "imul rax, rbx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xaf",
       "ExpectedArm64ASM": [
-        "smulh x20, x4, x7",
-        "mul x4, x4, x7",
-        "asr x21, x4, #63",
-        "cmp x20, x21",
+        "mov x20, x4",
+        "mov x21, x7",
+        "smulh x22, x20, x21",
+        "mul x23, x20, x21",
+        "mov x4, x23",
+        "asr x20, x23, #63",
+        "cmp x22, x20",
         "ccmn xzr, #0, #nzCV, eq"
       ]
     },
     "cmpxchg al, bl": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 17,
       "Comment": "0x0f 0xb0",
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "uxtb w21, w4",
-        "uxtb x22, w4",
-        "eor w27, w22, w21",
-        "lsl w0, w22, #24",
-        "cmp w0, w21, lsl #24",
-        "sub w26, w22, w21",
-        "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "bfxil x4, x20, #0, #8"
+        "mov x20, x7",
+        "uxtb w21, w20",
+        "mov x20, x4",
+        "uxtb w22, w20",
+        "uxtb x23, w20",
+        "eor w24, w23, w22",
+        "mov x27, x24",
+        "lsl w0, w23, #24",
+        "cmp w0, w22, lsl #24",
+        "sub w24, w23, w22",
+        "mov x26, x24",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #8",
+        "mov x4, x22"
       ]
     },
     "cmpxchg [rax], bl": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 19,
       "Comment": "0x0f 0xb0",
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "uxtb w21, w4",
-        "mov w1, w21",
-        "casalb w1, w20, [x4]",
-        "mov w20, w1",
-        "bfxil x4, x20, #0, #8",
-        "eor w27, w21, w20",
-        "lsl w0, w21, #24",
-        "cmp w0, w20, lsl #24",
-        "sub w26, w21, w20",
+        "mov x20, x7",
+        "uxtb w21, w20",
+        "mov x20, x4",
+        "uxtb w22, w20",
+        "mov w1, w22",
+        "casalb w1, w21, [x20]",
+        "mov w23, w1",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #8",
+        "mov x4, x21",
+        "eor w20, w22, w23",
+        "mov x27, x20",
+        "lsl w0, w22, #24",
+        "cmp w0, w23, lsl #24",
+        "sub w20, w22, w23",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmpxchg ax, bx": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 17,
       "Comment": "0x0f 0xb1",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w4",
-        "uxth x22, w4",
-        "eor w27, w22, w21",
-        "lsl w0, w22, #16",
-        "cmp w0, w21, lsl #16",
-        "sub w26, w22, w21",
-        "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "uxth x23, w20",
+        "eor w24, w23, w22",
+        "mov x27, x24",
+        "lsl w0, w23, #16",
+        "cmp w0, w22, lsl #16",
+        "sub w24, w23, w22",
+        "mov x26, x24",
+        "mrs x22, nzcv",
+        "eor w23, w22, #0x20000000",
+        "msr nzcv, x23",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "cmpxchg [rax], bx": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 19,
       "Comment": "0x0f 0xb1",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "uxth w21, w4",
-        "mov w1, w21",
-        "casalh w1, w20, [x4]",
-        "mov w20, w1",
-        "bfxil x4, x20, #0, #16",
-        "eor w27, w21, w20",
-        "lsl w0, w21, #16",
-        "cmp w0, w20, lsl #16",
-        "sub w26, w21, w20",
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x20, x4",
+        "uxth w22, w20",
+        "mov w1, w22",
+        "casalh w1, w21, [x20]",
+        "mov w23, w1",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #16",
+        "mov x4, x21",
+        "eor w20, w22, w23",
+        "mov x27, x20",
+        "lsl w0, w22, #16",
+        "cmp w0, w23, lsl #16",
+        "sub w20, w22, w23",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmpxchg eax, ebx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xb1",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "mov w22, w4",
-        "eor w27, w22, w21",
-        "subs w26, w22, w21",
-        "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
-        "mov x4, x20"
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "mov w22, w20",
+        "mov w23, w20",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "subs w20, w23, w22",
+        "mov x26, x20",
+        "mrs x20, nzcv",
+        "eor w22, w20, #0x20000000",
+        "msr nzcv, x22",
+        "mov x4, x21"
       ]
     },
     "cmpxchg [rax], ebx": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 17,
       "Comment": "0x0f 0xb1",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "mov w21, w4",
-        "mov w1, w21",
-        "casal w1, w20, [x4]",
-        "mov w20, w1",
-        "cmp w20, w21",
-        "csel x4, x4, x20, eq",
-        "eor w27, w21, w20",
-        "subs w26, w21, w20",
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "mov w22, w20",
+        "mov w1, w22",
+        "casal w1, w21, [x20]",
+        "mov w23, w1",
+        "cmp w23, w22",
+        "csel x21, x20, x23, eq",
+        "mov x4, x21",
+        "eor w20, w22, w23",
+        "mov x27, x20",
+        "subs w20, w22, w23",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "cmpxchg rax, rbx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xb1",
       "ExpectedArm64ASM": [
         "mov x20, x7",
-        "mov w27, #0x0",
-        "subs x26, x4, x4",
+        "mov x21, x4",
+        "mov w22, #0x0",
+        "mov x27, x22",
+        "subs x22, x21, x21",
+        "mov x26, x22",
         "mrs x21, nzcv",
-        "eor w21, w21, #0x20000000",
-        "msr nzcv, x21",
+        "eor w22, w21, #0x20000000",
+        "msr nzcv, x22",
         "mov x4, x20"
       ]
     },
     "cmpxchg [rax], rbx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xb1",
       "ExpectedArm64ASM": [
-        "mov x20, x4",
-        "mov x1, x20",
-        "casal x1, x7, [x20]",
-        "mov x4, x1",
-        "eor w27, w20, w4",
-        "subs x26, x20, x4",
+        "mov x20, x7",
+        "mov x21, x4",
+        "mov x1, x21",
+        "casal x1, x20, [x21]",
+        "mov x22, x1",
+        "mov x4, x22",
+        "eor w20, w21, w22",
+        "mov x27, x20",
+        "subs x20, x21, x22",
+        "mov x26, x20",
         "mrs x20, nzcv",
-        "eor w20, w20, #0x20000000",
-        "msr nzcv, x20"
+        "eor w21, w20, #0x20000000",
+        "msr nzcv, x21"
       ]
     },
     "btr ax, bx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "and x20, x7, #0xf",
-        "lsr w21, w4, w20",
-        "ubfx x21, x21, #0, #1",
-        "lsl x21, x21, #29",
-        "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w4, w20",
-        "bfxil x4, x20, #0, #16",
-        "msr nzcv, x21"
+        "mov x20, x7",
+        "mov x21, x4",
+        "and x22, x20, #0xf",
+        "lsr w20, w21, w22",
+        "ubfx x23, x20, #0, #1",
+        "lsl x20, x23, #29",
+        "mov w23, #0x1",
+        "lsl w24, w23, w22",
+        "bic w22, w21, w24",
+        "mov x23, x21",
+        "bfxil x23, x22, #0, #16",
+        "mov x4, x23",
+        "msr nzcv, x20"
       ]
     },
     "btr [rax], bx": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #13",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "bic x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #13",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "bic x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "btr eax, ebx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "lsr w20, w4, w7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "mov w21, #0x1",
-        "lsl w21, w21, w7",
-        "bic w4, w4, w21",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "lsr w22, w21, w20",
+        "ubfx x23, x22, #0, #1",
+        "lsl x22, x23, #29",
+        "mov w23, #0x1",
+        "lsl w24, w23, w20",
+        "bic w20, w21, w24",
+        "mov x4, x20",
+        "msr nzcv, x22"
       ]
     },
     "btr [rax], ebx": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #29",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "bic x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #29",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "bic x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "btr rax, rbx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "lsr x20, x4, x7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "mov w21, #0x1",
-        "lsl x21, x21, x7",
-        "bic x4, x4, x21",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "lsr x22, x21, x20",
+        "ubfx x23, x22, #0, #1",
+        "lsl x22, x23, #29",
+        "mov w23, #0x1",
+        "lsl x24, x23, x20",
+        "bic x20, x21, x24",
+        "mov x4, x20",
+        "msr nzcv, x22"
       ]
     },
     "btr [rax], rbx": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "asr x21, x7, #3",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "bic x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "asr x23, x20, #3",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "bic x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "movzx ax, bl": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0xb6",
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x7",
+        "uxtb w21, w20",
+        "mov x20, x4",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "lock btr [rax], bx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #13",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldclralb w22, w21, [x21]",
-        "lsr w20, w21, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #13",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldclralb w23, w21, [x20]",
+        "lsr w20, w21, w22",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "lock btr [rax], ebx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #29",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldclralb w22, w21, [x21]",
-        "lsr w20, w21, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #29",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldclralb w23, w21, [x20]",
+        "lsr w20, w21, w22",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "lock btr [rax], rbx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "asr x21, x7, #3",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldclralb w22, w21, [x21]",
-        "lsr w20, w21, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "asr x23, x20, #3",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldclralb w23, w21, [x20]",
+        "lsr w20, w21, w22",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "movzx ax, byte [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xb6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22"
       ]
     },
     "movzx eax, bl": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xb6",
       "ExpectedArm64ASM": [
-        "uxtb w4, w7"
+        "mov x20, x7",
+        "uxtb w21, w20",
+        "mov x4, x21"
       ]
     },
     "movzx eax, byte [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xb6",
       "ExpectedArm64ASM": [
-        "ldrb w4, [x4]"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, bl": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xb6",
       "ExpectedArm64ASM": [
-        "uxtb w4, w7"
+        "mov x20, x7",
+        "uxtb w21, w20",
+        "mov x4, x21"
       ]
     },
     "movzx rax, byte [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xb6",
       "ExpectedArm64ASM": [
-        "ldrb w4, [x4]"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "movzx eax, bx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xb7",
       "ExpectedArm64ASM": [
-        "uxth w4, w7"
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x4, x21"
       ]
     },
     "movzx eax, word [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xb7",
       "ExpectedArm64ASM": [
-        "ldrh w4, [x4]"
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "movzx rax, bx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xb7",
       "ExpectedArm64ASM": [
-        "uxth w4, w7"
+        "mov x20, x7",
+        "uxth w21, w20",
+        "mov x4, x21"
       ]
     },
     "movzx rax, word [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xb7",
       "ExpectedArm64ASM": [
-        "ldrh w4, [x4]"
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "mov x4, x21"
       ]
     },
     "btc ax, bx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xbb",
       "ExpectedArm64ASM": [
-        "and x20, x7, #0xf",
-        "lsr w21, w4, w20",
-        "ubfx x21, x21, #0, #1",
-        "lsl x21, x21, #29",
-        "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "eor w20, w4, w20",
-        "bfxil x4, x20, #0, #16",
-        "msr nzcv, x21"
+        "mov x20, x7",
+        "mov x21, x4",
+        "and x22, x20, #0xf",
+        "lsr w20, w21, w22",
+        "ubfx x23, x20, #0, #1",
+        "lsl x20, x23, #29",
+        "mov w23, #0x1",
+        "lsl w24, w23, w22",
+        "eor w22, w21, w24",
+        "mov x23, x21",
+        "bfxil x23, x22, #0, #16",
+        "mov x4, x23",
+        "msr nzcv, x20"
       ]
     },
     "btc [rax], bx": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xbb",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #13",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "eor x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #13",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "eor x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "btc eax, ebx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xbb",
       "ExpectedArm64ASM": [
-        "lsr w20, w4, w7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "mov w21, #0x1",
-        "lsl w21, w21, w7",
-        "eor w4, w4, w21",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "lsr w22, w21, w20",
+        "ubfx x23, x22, #0, #1",
+        "lsl x22, x23, #29",
+        "mov w23, #0x1",
+        "lsl w24, w23, w20",
+        "eor w20, w21, w24",
+        "mov x4, x20",
+        "msr nzcv, x22"
       ]
     },
     "btc [rax], ebx": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xbb",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #29",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "eor x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #29",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "eor x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "btc rax, rbx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xbb",
       "ExpectedArm64ASM": [
-        "lsr x20, x4, x7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "mov w21, #0x1",
-        "lsl x21, x21, x7",
-        "eor x4, x4, x21",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "lsr x22, x21, x20",
+        "ubfx x23, x22, #0, #1",
+        "lsl x22, x23, #29",
+        "mov w23, #0x1",
+        "lsl x24, x23, x20",
+        "eor x20, x21, x24",
+        "mov x4, x20",
+        "msr nzcv, x22"
       ]
     },
     "btc [rax], rbx": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xbb",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "asr x21, x7, #3",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldrb w23, [x4, x21, sxtx]",
-        "eor x22, x23, x22",
-        "strb w22, [x4, x21, sxtx]",
-        "lsr w20, w23, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "asr x23, x20, #3",
+        "mov w20, #0x1",
+        "lsl x24, x20, x22",
+        "ldrb w20, [x21, x23, sxtx]",
+        "eor x25, x20, x24",
+        "strb w25, [x21, x23, sxtx]",
+        "lsr w21, w20, w22",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "lock btc [rax], bx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #13",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldeoralb w22, w21, [x21]",
-        "lsr w20, w21, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #13",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldeoralb w23, w21, [x20]",
+        "lsr w20, w21, w22",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "lock btc [rax], ebx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "sbfx x21, x7, #3, #29",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldeoralb w22, w21, [x21]",
-        "lsr w20, w21, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "sbfx x23, x20, #3, #29",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldeoralb w23, w21, [x20]",
+        "lsr w20, w21, w22",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "lock btc [rax], rbx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 12,
       "Comment": "0x0f 0xb3",
       "ExpectedArm64ASM": [
-        "ubfx x20, x7, #0, #3",
-        "asr x21, x7, #3",
-        "add x21, x4, x21",
-        "mov w22, #0x1",
-        "lsl x22, x22, x20",
-        "ldeoralb w22, w21, [x21]",
-        "lsr w20, w21, w20",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x7",
+        "mov x21, x4",
+        "ubfx x22, x20, #0, #3",
+        "asr x23, x20, #3",
+        "add x20, x21, x23",
+        "mov w21, #0x1",
+        "lsl x23, x21, x22",
+        "ldeoralb w23, w21, [x20]",
+        "lsr w20, w21, w22",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "bsf ax, bx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 14,
       "Comment": "0x0f 0xbc",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "uxth w21, w7",
-        "uxth w0, w21",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "mov x22, x7",
+        "uxth w23, w22",
+        "uxth w0, w23",
         "cmp w0, #0x0 (0)",
         "rbit w0, w0",
         "clz w22, w0",
         "csinv w22, w22, wzr, ne",
-        "cmn wzr, w21, lsl #16",
-        "csel x20, x20, x22, eq",
-        "bfxil x4, x20, #0, #16"
+        "cmn wzr, w23, lsl #16",
+        "csel x23, x21, x22, eq",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #16",
+        "mov x4, x21"
       ]
     },
     "bsf eax, ebx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "0x0f 0xbc",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "lsr w0, w20, #0",
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov w22, w21",
+        "lsr w0, w22, #0",
         "cmp w0, #0x0 (0)",
         "rbit w0, w0",
         "clz w21, w0",
         "csinv w21, w21, wzr, ne",
-        "tst w20, w20",
-        "csel x4, x4, x21, eq"
+        "tst w22, w22",
+        "csel x22, x20, x21, eq",
+        "mov x4, x22"
       ]
     },
     "bsf rax, rbx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xbc",
       "ExpectedArm64ASM": [
-        "rbit x0, x7",
-        "cmp x7, #0x0 (0)",
-        "clz x20, x0",
-        "csinv x20, x20, xzr, ne",
-        "tst x7, x7",
-        "csel x4, x4, x20, eq"
+        "mov x20, x4",
+        "mov x21, x7",
+        "rbit x0, x21",
+        "cmp x21, #0x0 (0)",
+        "clz x22, x0",
+        "csinv x22, x22, xzr, ne",
+        "tst x21, x21",
+        "csel x21, x20, x22, eq",
+        "mov x4, x21"
       ]
     },
     "bsr ax, bx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 14,
       "Comment": "0x0f 0xbd",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "uxth w21, w7",
+        "mov x20, x4",
+        "uxth w21, w20",
+        "mov x22, x7",
+        "uxth w23, w22",
         "mov x0, #0xf",
-        "lsl w22, w21, #16",
+        "lsl w22, w23, #16",
         "orr w22, w22, #0x8000",
         "clz w22, w22",
         "sub x22, x0, x22",
-        "cmn wzr, w21, lsl #16",
-        "csel x20, x20, x22, eq",
-        "bfxil x4, x20, #0, #16"
+        "cmn wzr, w23, lsl #16",
+        "csel x23, x21, x22, eq",
+        "mov x21, x20",
+        "bfxil x21, x23, #0, #16",
+        "mov x4, x21"
       ]
     },
     "bsr eax, ebx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xbd",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov w22, w21",
         "mov x0, #0x1f",
-        "clz w21, w20",
+        "clz w21, w22",
         "sub x21, x0, x21",
-        "tst w20, w20",
-        "csel x4, x4, x21, eq"
+        "tst w22, w22",
+        "csel x22, x20, x21, eq",
+        "mov x4, x22"
       ]
     },
     "bsr rax, rbx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xbd",
       "ExpectedArm64ASM": [
+        "mov x20, x4",
+        "mov x21, x7",
         "mov x0, #0x3f",
-        "clz x20, x7",
-        "sub x20, x0, x20",
-        "tst x7, x7",
-        "csel x4, x4, x20, eq"
+        "clz x22, x21",
+        "sub x22, x0, x22",
+        "tst x21, x21",
+        "csel x21, x20, x22, eq",
+        "mov x4, x21"
       ]
     },
     "movsx ax, bl": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0xbe",
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "sxtb x20, w20",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x7",
+        "uxtb w21, w20",
+        "sxtb x20, w21",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "movsx ax, byte [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0xbe",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "sxtb x20, w20",
-        "bfxil x4, x20, #0, #16"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "sxtb x22, w21",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21"
       ]
     },
     "movsx eax, bl": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xbe",
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "sxtb x20, w20",
-        "mov w4, w20"
+        "mov x20, x7",
+        "uxtb w21, w20",
+        "sxtb x20, w21",
+        "mov w21, w20",
+        "mov x4, x21"
       ]
     },
     "movsx eax, byte [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xbe",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "sxtb x20, w20",
-        "mov w4, w20"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "sxtb x20, w21",
+        "mov w21, w20",
+        "mov x4, x21"
       ]
     },
     "movsx rax, bl": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0xbe",
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "sxtb x4, w20"
+        "mov x20, x7",
+        "uxtb w21, w20",
+        "sxtb x20, w21",
+        "mov x4, x20"
       ]
     },
     "movsx rax, byte [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0xbe",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "sxtb x4, w20"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "sxtb x20, w21",
+        "mov x4, x20"
       ]
     },
     "movsx eax, bx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xbf",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "sxth x20, w20",
-        "mov w4, w20"
+        "mov x20, x7",
+        "uxth w21, w20",
+        "sxth x20, w21",
+        "mov w21, w20",
+        "mov x4, x21"
       ]
     },
     "movsx eax, word [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xbf",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x4]",
-        "sxth x20, w20",
-        "mov w4, w20"
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "sxth x20, w21",
+        "mov w21, w20",
+        "mov x4, x21"
       ]
     },
     "movsx rax, bx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0xbf",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "sxth x4, w20"
+        "mov x20, x7",
+        "uxth w21, w20",
+        "sxth x20, w21",
+        "mov x4, x20"
       ]
     },
     "movsx rax, word [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0xbf",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x4]",
-        "sxth x4, w20"
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "sxth x20, w21",
+        "mov x4, x20"
       ]
     },
     "xadd al, bl": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 16,
       "Comment": "0x0f 0xc0",
       "ExpectedArm64ASM": [
-        "uxtb w20, w4",
-        "uxtb w21, w7",
-        "eor w27, w20, w21",
-        "lsl w0, w20, #24",
-        "cmn w0, w21, lsl #24",
-        "add w26, w20, w21",
-        "bfxil x7, x20, #0, #8",
-        "bfxil x4, x26, #0, #8"
+        "mov x20, x4",
+        "uxtb w21, w20",
+        "mov x22, x7",
+        "uxtb w23, w22",
+        "eor w24, w21, w23",
+        "mov x27, x24",
+        "lsl w0, w21, #24",
+        "cmn w0, w23, lsl #24",
+        "add w24, w21, w23",
+        "mov x26, x24",
+        "mov x23, x22",
+        "bfxil x23, x21, #0, #8",
+        "mov x7, x23",
+        "mov x21, x20",
+        "bfxil x21, x24, #0, #8",
+        "mov x4, x21"
       ]
     },
     "xadd [rax], bl": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xc0",
       "ExpectedArm64ASM": [
-        "uxtb w20, w7",
-        "ldaddalb w20, w21, [x4]",
-        "bfxil x7, x21, #0, #8",
-        "eor w27, w21, w20",
-        "lsl w0, w21, #24",
-        "cmn w0, w20, lsl #24",
-        "add w26, w21, w20"
+        "mov x20, x4",
+        "mov x21, x7",
+        "uxtb w22, w21",
+        "ldaddalb w22, w23, [x20]",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #8",
+        "mov x7, x20",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "lsl w0, w23, #24",
+        "cmn w0, w22, lsl #24",
+        "add w20, w23, w22",
+        "mov x26, x20"
       ]
     },
     "xadd ax, bx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 16,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
-        "uxth w20, w4",
-        "uxth w21, w7",
-        "eor w27, w20, w21",
-        "lsl w0, w20, #16",
-        "cmn w0, w21, lsl #16",
-        "add w26, w20, w21",
-        "bfxil x7, x20, #0, #16",
-        "bfxil x4, x26, #0, #16"
+        "mov x20, x4",
+        "uxth w21, w20",
+        "mov x22, x7",
+        "uxth w23, w22",
+        "eor w24, w21, w23",
+        "mov x27, x24",
+        "lsl w0, w21, #16",
+        "cmn w0, w23, lsl #16",
+        "add w24, w21, w23",
+        "mov x26, x24",
+        "mov x23, x22",
+        "bfxil x23, x21, #0, #16",
+        "mov x7, x23",
+        "mov x21, x20",
+        "bfxil x21, x24, #0, #16",
+        "mov x4, x21"
       ]
     },
     "xadd [rax], bx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 13,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "ldaddalh w20, w21, [x4]",
-        "bfxil x7, x21, #0, #16",
-        "eor w27, w21, w20",
-        "lsl w0, w21, #16",
-        "cmn w0, w20, lsl #16",
-        "add w26, w21, w20"
+        "mov x20, x4",
+        "mov x21, x7",
+        "uxth w22, w21",
+        "ldaddalh w22, w23, [x20]",
+        "mov x20, x21",
+        "bfxil x20, x23, #0, #16",
+        "mov x7, x20",
+        "eor w20, w23, w22",
+        "mov x27, x20",
+        "lsl w0, w23, #16",
+        "cmn w0, w22, lsl #16",
+        "add w20, w23, w22",
+        "mov x26, x20"
       ]
     },
     "xadd eax, ebx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "mov w21, w7",
-        "eor w27, w20, w21",
-        "adds w26, w20, w21",
-        "mov x7, x20",
-        "mov x4, x26"
+        "mov x20, x4",
+        "mov w21, w20",
+        "mov x20, x7",
+        "mov w22, w20",
+        "eor w20, w21, w22",
+        "mov x27, x20",
+        "adds w20, w21, w22",
+        "mov x26, x20",
+        "mov x7, x21",
+        "mov x4, x20"
       ]
     },
     "xadd [rax], ebx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "ldaddal w20, w7, [x4]",
-        "eor w27, w7, w20",
-        "adds w26, w7, w20"
+        "mov x20, x4",
+        "mov x21, x7",
+        "mov w22, w21",
+        "ldaddal w22, w21, [x20]",
+        "mov x7, x21",
+        "eor w20, w21, w22",
+        "mov x27, x20",
+        "adds w20, w21, w22",
+        "mov x26, x20"
       ]
     },
     "xadd rax, rbx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
         "mov x20, x4",
-        "eor w27, w20, w7",
-        "adds x26, x20, x7",
+        "mov x21, x7",
+        "eor w22, w20, w21",
+        "mov x27, x22",
+        "adds x22, x20, x21",
+        "mov x26, x22",
         "mov x7, x20",
-        "mov x4, x26"
+        "mov x4, x22"
       ]
     },
     "xadd [rax], rbx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xc1",
       "ExpectedArm64ASM": [
-        "mov x20, x7",
-        "ldaddal x20, x7, [x4]",
-        "eor w27, w7, w20",
-        "adds x26, x7, x20"
+        "mov x20, x4",
+        "mov x21, x7",
+        "ldaddal x21, x22, [x20]",
+        "mov x7, x22",
+        "eor w20, w22, w21",
+        "mov x27, x20",
+        "adds x20, x22, x21",
+        "mov x26, x20"
       ]
     },
     "cmpps xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0xc2",
       "ExpectedArm64ASM": [
-        "fcmeq v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmeq v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpps xmm0, xmm1, 1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0xc2",
       "ExpectedArm64ASM": [
-        "fcmgt v16.4s, v17.4s, v16.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmgt v4.4s, v2.4s, v3.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpps xmm0, xmm1, 2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0xc2",
       "ExpectedArm64ASM": [
-        "fcmge v16.4s, v17.4s, v16.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmge v4.4s, v2.4s, v3.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpps xmm0, xmm1, 3": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0xc2",
       "ExpectedArm64ASM": [
-        "fcmge v0.4s, v16.4s, v17.4s",
-        "fcmgt v1.4s, v17.4s, v16.4s",
-        "orr v16.16b, v0.16b, v1.16b",
-        "mvn v16.16b, v16.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmge v0.4s, v3.4s, v2.4s",
+        "fcmgt v1.4s, v2.4s, v3.4s",
+        "orr v4.16b, v0.16b, v1.16b",
+        "mvn v4.16b, v4.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpps xmm0, xmm1, 4": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xc2",
       "ExpectedArm64ASM": [
-        "fcmeq v16.4s, v16.4s, v17.4s",
-        "mvn v16.16b, v16.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmeq v4.4s, v3.4s, v2.4s",
+        "mvn v4.16b, v4.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpps xmm0, xmm1, 5": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xc2",
       "ExpectedArm64ASM": [
-        "fcmgt v2.4s, v17.4s, v16.4s",
-        "mvn v16.16b, v2.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmgt v4.4s, v2.4s, v3.4s",
+        "mvn v2.16b, v4.16b",
+        "mov v16.16b, v2.16b"
       ]
     },
     "cmpps xmm0, xmm1, 6": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xc2",
       "ExpectedArm64ASM": [
-        "fcmge v2.4s, v17.4s, v16.4s",
-        "mvn v16.16b, v2.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmge v4.4s, v2.4s, v3.4s",
+        "mvn v2.16b, v4.16b",
+        "mov v16.16b, v2.16b"
       ]
     },
     "cmpps xmm0, xmm1, 7": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0xc2",
       "ExpectedArm64ASM": [
-        "fcmge v0.4s, v16.4s, v17.4s",
-        "fcmgt v1.4s, v17.4s, v16.4s",
-        "orr v16.16b, v0.16b, v1.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmge v0.4s, v3.4s, v2.4s",
+        "fcmgt v1.4s, v2.4s, v3.4s",
+        "orr v4.16b, v0.16b, v1.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "movnti [rax], ebx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x0f 0xc3",
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "str w20, [x4]"
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x20, x4",
+        "str w21, [x20]"
       ]
     },
     "movnti [rax], rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xc3",
       "ExpectedArm64ASM": [
-        "str x7, [x4]"
+        "mov x20, x7",
+        "mov x21, x4",
+        "str x20, [x21]"
       ]
     },
     "pinsrw mm0, eax, 0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xc4",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "mov v2.h[0], w4",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[0], w20",
+        "str d3, [x28, #768]"
       ]
     },
     "pinsrw mm0, eax, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xc4",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "mov v2.h[1], w4",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[1], w20",
+        "str d3, [x28, #768]"
       ]
     },
     "pinsrw mm0, eax, 2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xc4",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "mov v2.h[2], w4",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[2], w20",
+        "str d3, [x28, #768]"
       ]
     },
     "pinsrw mm0, eax, 3": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xc4",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "mov v2.h[3], w4",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[3], w20",
+        "str d3, [x28, #768]"
       ]
     },
     "pinsrw mm0, eax, 4": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xc4",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "mov v2.h[0], w4",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[0], w20",
+        "str d3, [x28, #768]"
       ]
     },
     "pinsrw mm0, [rax], 0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xc4",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ld1 {v2.h}[0], [x4]",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.h}[0], [x20]",
+        "str d3, [x28, #768]"
       ]
     },
     "pinsrw mm0, [rax], 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xc4",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ld1 {v2.h}[1], [x4]",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.h}[1], [x20]",
+        "str d3, [x28, #768]"
       ]
     },
     "pinsrw mm0, [rax], 2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xc4",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ld1 {v2.h}[2], [x4]",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.h}[2], [x20]",
+        "str d3, [x28, #768]"
       ]
     },
     "pinsrw mm0, [rax], 3": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xc4",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ld1 {v2.h}[3], [x4]",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.h}[3], [x20]",
+        "str d3, [x28, #768]"
       ]
     },
     "pinsrw mm0, [rax], 4": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xc4",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ld1 {v2.h}[0], [x4]",
-        "str d2, [x28, #768]"
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.h}[0], [x20]",
+        "str d3, [x28, #768]"
       ]
     },
     "pextrw eax, mm0, 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xc5",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "umov w4, v2.h[0]"
+        "umov w20, v2.h[0]",
+        "mov x4, x20"
       ]
     },
     "pextrw eax, mm0, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xc5",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "umov w4, v2.h[1]"
+        "umov w20, v2.h[1]",
+        "mov x4, x20"
       ]
     },
     "pextrw eax, mm0, 2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xc5",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "umov w4, v2.h[2]"
+        "umov w20, v2.h[2]",
+        "mov x4, x20"
       ]
     },
     "pextrw eax, mm0, 3": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xc5",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "umov w4, v2.h[3]"
+        "umov w20, v2.h[3]",
+        "mov x4, x20"
       ]
     },
     "pextrw eax, mm0, 4": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xc5",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "umov w4, v2.h[0]"
+        "umov w20, v2.h[0]",
+        "mov x4, x20"
       ]
     },
     "shufps xmm0, xmm1, 01000100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Dst[63:0]    = Src1[63:0]",
         "Dest[127:64] = Src2[63:0]",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "zip1 v16.2d, v16.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "zip1 v4.2d, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "shufps xmm0, xmm1, 11101110b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Dst[63:0]    = Src1[127:64]",
         "Dest[127:64] = Src2[127:64]",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "zip2 v16.2d, v16.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "zip2 v4.2d, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "shufps xmm0, xmm1, 11100100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Dst[63:0]    = Src1[63:0]",
         "Dest[127:64] = Src2[127:64]",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.d[1], v17.d[1]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[1], v3.d[1]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "shufps xmm0, xmm1, 01001110b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Dst[63:0]    = Src1[63:0]",
         "Dest[127:64] = Src2[127:64]",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "ext v16.16b, v16.16b, v17.16b, #8"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "ext v4.16b, v2.16b, v3.16b, #8",
+        "mov v16.16b, v4.16b"
       ]
     },
     "shufps xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Duplicate selected element between each 64-bit segment",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[0]",
-        "dup v3.4s, v17.s[0]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[0]",
+        "dup v2.4s, v3.s[0]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 00000101b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Duplicate selected element between each 64-bit segment",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[1]",
-        "dup v3.4s, v17.s[0]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[1]",
+        "dup v2.4s, v3.s[0]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 00001010b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Duplicate selected element between each 64-bit segment",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[2]",
-        "dup v3.4s, v17.s[0]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[2]",
+        "dup v2.4s, v3.s[0]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 00001111b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Duplicate selected element between each 64-bit segment",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[3]",
-        "dup v3.4s, v17.s[0]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[3]",
+        "dup v2.4s, v3.s[0]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 01010000b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Duplicate selected element between each 64-bit segment",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[0]",
-        "dup v3.4s, v17.s[1]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[0]",
+        "dup v2.4s, v3.s[1]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 01010101b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Duplicate selected element between each 64-bit segment",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[1]",
-        "dup v3.4s, v17.s[1]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[1]",
+        "dup v2.4s, v3.s[1]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 01011010b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Duplicate selected element between each 64-bit segment",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[2]",
-        "dup v3.4s, v17.s[1]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[2]",
+        "dup v2.4s, v3.s[1]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 01011111b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Duplicate selected element between each 64-bit segment",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[3]",
-        "dup v3.4s, v17.s[1]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[3]",
+        "dup v2.4s, v3.s[1]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 10100000b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Duplicate selected element between each 64-bit segment",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[0]",
-        "dup v3.4s, v17.s[2]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[0]",
+        "dup v2.4s, v3.s[2]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 10100101b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Duplicate selected element between each 64-bit segment",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[1]",
-        "dup v3.4s, v17.s[2]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[1]",
+        "dup v2.4s, v3.s[2]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 10101010b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Duplicate selected element between each 64-bit segment",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[2]",
-        "dup v3.4s, v17.s[2]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[2]",
+        "dup v2.4s, v3.s[2]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 10101111b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Duplicate selected element between each 64-bit segment",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[3]",
-        "dup v3.4s, v17.s[2]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[3]",
+        "dup v2.4s, v3.s[2]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 11110000b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Duplicate selected element between each 64-bit segment",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[0]",
-        "dup v3.4s, v17.s[3]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[0]",
+        "dup v2.4s, v3.s[3]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 11110101b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Duplicate selected element between each 64-bit segment",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[1]",
-        "dup v3.4s, v17.s[3]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[1]",
+        "dup v2.4s, v3.s[3]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 11111010b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Duplicate selected element between each 64-bit segment",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[2]",
-        "dup v3.4s, v17.s[3]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[2]",
+        "dup v2.4s, v3.s[3]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 11100000b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Bottom elements duplicated, Top 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[0]",
-        "zip2 v16.2d, v2.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[0]",
+        "zip2 v2.2d, v4.2d, v3.2d",
+        "mov v16.16b, v2.16b"
       ]
     },
     "shufps xmm0, xmm1, 11100101b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Bottom elements duplicated, Top 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[1]",
-        "zip2 v16.2d, v2.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[1]",
+        "zip2 v2.2d, v4.2d, v3.2d",
+        "mov v16.16b, v2.16b"
       ]
     },
     "shufps xmm0, xmm1, 11101010b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Bottom elements duplicated, Top 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[2]",
-        "zip2 v16.2d, v2.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[2]",
+        "zip2 v2.2d, v4.2d, v3.2d",
+        "mov v16.16b, v2.16b"
       ]
     },
     "shufps xmm0, xmm1, 11101111b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Bottom elements duplicated, Top 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[3]",
-        "zip2 v16.2d, v2.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[3]",
+        "zip2 v2.2d, v4.2d, v3.2d",
+        "mov v16.16b, v2.16b"
       ]
     },
     "shufps xmm0, xmm1, 01000000b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Bottom elements duplicated, Top 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[0]",
-        "zip1 v16.2d, v2.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[0]",
+        "zip1 v2.2d, v4.2d, v3.2d",
+        "mov v16.16b, v2.16b"
       ]
     },
     "shufps xmm0, xmm1, 01000101b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Bottom elements duplicated, Top 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[1]",
-        "zip1 v16.2d, v2.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[1]",
+        "zip1 v2.2d, v4.2d, v3.2d",
+        "mov v16.16b, v2.16b"
       ]
     },
     "shufps xmm0, xmm1, 01001010b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Bottom elements duplicated, Top 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[2]",
-        "zip1 v16.2d, v2.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[2]",
+        "zip1 v2.2d, v4.2d, v3.2d",
+        "mov v16.16b, v2.16b"
       ]
     },
     "shufps xmm0, xmm1, 01001111b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Bottom elements duplicated, Top 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[3]",
-        "zip1 v16.2d, v2.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[3]",
+        "zip1 v2.2d, v4.2d, v3.2d",
+        "mov v16.16b, v2.16b"
       ]
     },
     "shufps xmm0, xmm1, 00000100b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Top elements duplicated, Bottom 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v17.s[0]",
-        "zip1 v16.2d, v16.2d, v2.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v3.s[0]",
+        "zip1 v3.2d, v2.2d, v4.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 01010100b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Top elements duplicated, Bottom 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v17.s[1]",
-        "zip1 v16.2d, v16.2d, v2.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v3.s[1]",
+        "zip1 v3.2d, v2.2d, v4.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 10100100b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Top elements duplicated, Bottom 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v17.s[2]",
-        "zip1 v16.2d, v16.2d, v2.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v3.s[2]",
+        "zip1 v3.2d, v2.2d, v4.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 11110100b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Top elements duplicated, Bottom 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v17.s[3]",
-        "zip1 v16.2d, v16.2d, v2.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v3.s[3]",
+        "zip1 v3.2d, v2.2d, v4.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 00001110b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Top elements duplicated, Top 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v17.s[0]",
-        "zip2 v16.2d, v16.2d, v2.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v3.s[0]",
+        "zip2 v3.2d, v2.2d, v4.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 01011110b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Top elements duplicated, Top 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v17.s[1]",
-        "zip2 v16.2d, v16.2d, v2.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v3.s[1]",
+        "zip2 v3.2d, v2.2d, v4.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 10101110b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Top elements duplicated, Top 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v17.s[2]",
-        "zip2 v16.2d, v16.2d, v2.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v3.s[2]",
+        "zip2 v3.2d, v2.2d, v4.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 11111110b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Top elements duplicated, Top 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v17.s[3]",
-        "zip2 v16.2d, v16.2d, v2.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v3.s[3]",
+        "zip2 v3.2d, v2.2d, v4.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 01000111b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "odd elements inverted, Low 64-bits inserted",
         "SRA quirks with RA fail to understand that v16 is dead",
@@ -3238,12 +4047,15 @@
       ],
       "ExpectedArm64ASM": [
         "mov v2.16b, v16.16b",
-        "mov v2.s[0], v16.s[3]",
-        "zip1 v16.2d, v2.2d, v17.2d"
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v2.s[3]",
+        "zip1 v2.2d, v4.2d, v3.2d",
+        "mov v16.16b, v2.16b"
       ]
     },
     "shufps xmm0, xmm1, 11100111b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "odd elements inverted, Top 64-bits inserted",
         "SRA quirks with RA fail to understand that v16 is dead",
@@ -3252,110 +4064,137 @@
       ],
       "ExpectedArm64ASM": [
         "mov v2.16b, v16.16b",
-        "mov v2.s[0], v16.s[3]",
-        "mov v16.16b, v2.16b",
-        "mov v16.d[1], v17.d[1]"
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v2.s[3]",
+        "mov v2.16b, v4.16b",
+        "mov v2.d[1], v3.d[1]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "shufps xmm0, xmm1, 11100001b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Lower 32-bit elements inverted, Top 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "rev64 v2.4s, v16.4s",
-        "mov v16.16b, v2.16b",
-        "mov v16.d[1], v17.d[1]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "rev64 v4.4s, v2.4s",
+        "mov v2.16b, v4.16b",
+        "mov v2.d[1], v3.d[1]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "shufps xmm0, xmm1, 01000001b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Lower 32-bit elements inverted, Low 64-bits inserted",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "rev64 v2.4s, v16.4s",
-        "zip1 v16.2d, v2.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "rev64 v4.4s, v2.4s",
+        "zip1 v2.2d, v4.2d, v3.2d",
+        "mov v16.16b, v2.16b"
       ]
     },
     "shufps xmm0, xmm1, 11111111b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Duplicate selected element between each 64-bit segment",
         "0x0f 0xc6"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v16.s[3]",
-        "dup v3.4s, v17.s[3]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "dup v4.4s, v2.s[3]",
+        "dup v2.4s, v3.s[3]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0xc6",
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "dup v3.4s, v16.s[0]",
-        "dup v2.4s, v2.s[0]",
-        "zip1 v16.2d, v3.2d, v2.2d"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "dup v4.4s, v2.s[0]",
+        "dup v2.4s, v3.s[0]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "shufps xmm0, xmm1, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0xc6",
       "ExpectedArm64ASM": [
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
         "ldr x0, [x28, #1768]",
-        "ldr q2, [x0, #16]",
-        "tbl v16.16b, {v16.16b, v17.16b}, v2.16b"
+        "ldr q4, [x0, #16]",
+        "tbl v5.16b, {v2.16b, v3.16b}, v4.16b",
+        "mov v16.16b, v5.16b"
       ]
     },
     "shufps xmm1, xmm0, 1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x0f 0xc6",
       "ExpectedArm64ASM": [
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
         "ldr x0, [x28, #1768]",
-        "ldr q2, [x0, #16]",
-        "mov v0.16b, v17.16b",
-        "mov v1.16b, v16.16b",
-        "tbl v17.16b, {v0.16b, v1.16b}, v2.16b"
+        "ldr q4, [x0, #16]",
+        "tbl v5.16b, {v2.16b, v3.16b}, v4.16b",
+        "mov v17.16b, v5.16b"
       ]
     },
     "shufps xmm0, [rax], 1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0xc6",
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr q3, [x20]",
         "ldr x0, [x28, #1768]",
-        "ldr q3, [x0, #16]",
-        "mov v0.16b, v16.16b",
-        "mov v1.16b, v2.16b",
-        "tbl v16.16b, {v0.16b, v1.16b}, v3.16b"
+        "ldr q4, [x0, #16]",
+        "tbl v5.16b, {v2.16b, v3.16b}, v4.16b",
+        "mov v16.16b, v5.16b"
       ]
     },
     "shufps xmm0, [rax], 0xFF": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0xc6",
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "dup v3.4s, v16.s[3]",
-        "dup v2.4s, v2.s[3]",
-        "zip1 v16.2d, v3.2d, v2.2d"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "dup v4.4s, v2.s[3]",
+        "dup v2.4s, v3.s[3]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "bswap eax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xc8",
       "ExpectedArm64ASM": [
-        "rev w4, w4"
+        "mov x20, x4",
+        "rev w21, w20",
+        "mov x4, x21"
       ]
     },
     "bswap rax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xc8",
       "ExpectedArm64ASM": [
-        "rev x4, x4"
+        "mov x20, x4",
+        "rev x21, x20",
+        "mov x4, x21"
       ]
     },
     "psrlw mm0, mm1": {
@@ -3368,8 +4207,8 @@
         "ushr d0, d0, #57",
         "dup v0.8h, v0.h[0]",
         "neg v0.8h, v0.8h",
-        "ushl v2.8h, v2.8h, v0.8h",
-        "str d2, [x28, #768]"
+        "ushl v4.8h, v2.8h, v0.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "psrld mm0, mm1": {
@@ -3382,8 +4221,8 @@
         "ushr d0, d0, #57",
         "dup v0.4s, v0.s[0]",
         "neg v0.4s, v0.4s",
-        "ushl v2.4s, v2.4s, v0.4s",
-        "str d2, [x28, #768]"
+        "ushl v4.4s, v2.4s, v0.4s",
+        "str d4, [x28, #768]"
       ]
     },
     "psrlq mm0, mm1": {
@@ -3396,8 +4235,8 @@
         "ushr d0, d0, #57",
         "dup v0.2d, v0.d[0]",
         "neg v0.2d, v0.2d",
-        "ushl v2.2d, v2.2d, v0.2d",
-        "str d2, [x28, #768]"
+        "ushl v4.2d, v2.2d, v0.2d",
+        "str d4, [x28, #768]"
       ]
     },
     "paddq mm0, mm1": {
@@ -3406,8 +4245,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "add v2.2d, v3.2d, v2.2d",
-        "str d2, [x28, #768]"
+        "add v4.2d, v3.2d, v2.2d",
+        "str d4, [x28, #768]"
       ]
     },
     "pmullw mm0, mm1": {
@@ -3416,22 +4255,23 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "mul v2.8h, v3.8h, v2.8h",
-        "str d2, [x28, #768]"
+        "mul v4.8h, v3.8h, v2.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "pmovmskb eax, mm0": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xd7",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #2272]",
-        "cmlt v2.16b, v2.16b, #0",
-        "and v2.16b, v2.16b, v3.16b",
-        "addp v2.16b, v2.16b, v2.16b",
-        "addp v2.8b, v2.8b, v2.8b",
-        "addp v2.8b, v2.8b, v2.8b",
-        "umov w4, v2.h[0]"
+        "cmlt v4.16b, v2.16b, #0",
+        "and v2.16b, v4.16b, v3.16b",
+        "addp v3.16b, v2.16b, v2.16b",
+        "addp v2.8b, v3.8b, v3.8b",
+        "addp v3.8b, v2.8b, v2.8b",
+        "umov w20, v3.h[0]",
+        "mov x4, x20"
       ]
     },
     "psubusb mm0, mm1": {
@@ -3440,8 +4280,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "uqsub v2.16b, v3.16b, v2.16b",
-        "str d2, [x28, #768]"
+        "uqsub v4.16b, v3.16b, v2.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "psubusw mm0, mm1": {
@@ -3450,8 +4290,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "uqsub v2.8h, v3.8h, v2.8h",
-        "str d2, [x28, #768]"
+        "uqsub v4.8h, v3.8h, v2.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "pminub mm0, mm1": {
@@ -3460,8 +4300,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "umin v2.16b, v3.16b, v2.16b",
-        "str d2, [x28, #768]"
+        "umin v4.16b, v3.16b, v2.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "pand mm0, mm1": {
@@ -3470,8 +4310,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "and v2.16b, v3.16b, v2.16b",
-        "str d2, [x28, #768]"
+        "and v4.16b, v3.16b, v2.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "paddusb mm0, mm1": {
@@ -3480,8 +4320,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "uqadd v2.16b, v3.16b, v2.16b",
-        "str d2, [x28, #768]"
+        "uqadd v4.16b, v3.16b, v2.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "paddusw mm0, mm1": {
@@ -3490,8 +4330,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "uqadd v2.8h, v3.8h, v2.8h",
-        "str d2, [x28, #768]"
+        "uqadd v4.8h, v3.8h, v2.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "pmaxub mm0, mm1": {
@@ -3500,8 +4340,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "umax v2.16b, v3.16b, v2.16b",
-        "str d2, [x28, #768]"
+        "umax v4.16b, v3.16b, v2.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "pandn mm0, mm1": {
@@ -3510,8 +4350,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "bic v2.16b, v2.16b, v3.16b",
-        "str d2, [x28, #768]"
+        "bic v4.16b, v2.16b, v3.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "pavgb mm0, mm1": {
@@ -3520,8 +4360,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "urhadd v2.16b, v3.16b, v2.16b",
-        "str d2, [x28, #768]"
+        "urhadd v4.16b, v3.16b, v2.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "psraw mm0, mm1": {
@@ -3534,8 +4374,8 @@
         "ushr d0, d0, #57",
         "dup v0.8h, v0.h[0]",
         "neg v0.8h, v0.8h",
-        "sshl v2.8h, v2.8h, v0.8h",
-        "str d2, [x28, #768]"
+        "sshl v4.8h, v2.8h, v0.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "psrad mm0, mm1": {
@@ -3548,8 +4388,8 @@
         "ushr d0, d0, #57",
         "dup v0.4s, v0.s[0]",
         "neg v0.4s, v0.4s",
-        "sshl v2.4s, v2.4s, v0.4s",
-        "str d2, [x28, #768]"
+        "sshl v4.4s, v2.4s, v0.4s",
+        "str d4, [x28, #768]"
       ]
     },
     "pavgw mm0, mm1": {
@@ -3558,8 +4398,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "urhadd v2.8h, v3.8h, v2.8h",
-        "str d2, [x28, #768]"
+        "urhadd v4.8h, v3.8h, v2.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "pmulhuw mm0, mm1": {
@@ -3568,9 +4408,9 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "umull v2.4s, v2.4h, v3.4h",
-        "shrn v2.4h, v2.4s, #16",
-        "str d2, [x28, #768]"
+        "umull v4.4s, v2.4h, v3.4h",
+        "shrn v4.4h, v4.4s, #16",
+        "str d4, [x28, #768]"
       ]
     },
     "pmulhw mm0, mm1": {
@@ -3579,17 +4419,18 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "smull v2.4s, v2.4h, v3.4h",
-        "shrn v2.4h, v2.4s, #16",
-        "str d2, [x28, #768]"
+        "smull v4.4s, v2.4h, v3.4h",
+        "shrn v4.4h, v4.4s, #16",
+        "str d4, [x28, #768]"
       ]
     },
     "movntq [rax], mm0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x0f 0xe7",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "str d2, [x4]"
+        "mov x20, x4",
+        "str d2, [x20]"
       ]
     },
     "psubsb mm0, mm1": {
@@ -3598,8 +4439,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "sqsub v2.16b, v3.16b, v2.16b",
-        "str d2, [x28, #768]"
+        "sqsub v4.16b, v3.16b, v2.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "psubsw mm0, mm1": {
@@ -3608,8 +4449,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "sqsub v2.8h, v3.8h, v2.8h",
-        "str d2, [x28, #768]"
+        "sqsub v4.8h, v3.8h, v2.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "pminsw mm0, mm1": {
@@ -3618,8 +4459,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "smin v2.8h, v3.8h, v2.8h",
-        "str d2, [x28, #768]"
+        "smin v4.8h, v3.8h, v2.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "por mm0, mm1": {
@@ -3628,8 +4469,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "orr v2.16b, v3.16b, v2.16b",
-        "str d2, [x28, #768]"
+        "orr v4.16b, v3.16b, v2.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "paddsb mm0, mm1": {
@@ -3638,8 +4479,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "sqadd v2.16b, v3.16b, v2.16b",
-        "str d2, [x28, #768]"
+        "sqadd v4.16b, v3.16b, v2.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "paddsw mm0, mm1": {
@@ -3648,8 +4489,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "sqadd v2.8h, v3.8h, v2.8h",
-        "str d2, [x28, #768]"
+        "sqadd v4.8h, v3.8h, v2.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "pmaxsw mm0, mm1": {
@@ -3658,8 +4499,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "smax v2.8h, v3.8h, v2.8h",
-        "str d2, [x28, #768]"
+        "smax v4.8h, v3.8h, v2.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "pxor mm0, mm1": {
@@ -3668,8 +4509,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "eor v2.16b, v3.16b, v2.16b",
-        "str d2, [x28, #768]"
+        "eor v4.16b, v3.16b, v2.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "psllw mm0, mm1": {
@@ -3681,8 +4522,8 @@
         "uqshl d0, d3, #57",
         "ushr d0, d0, #57",
         "dup v0.8h, v0.h[0]",
-        "ushl v2.8h, v2.8h, v0.8h",
-        "str d2, [x28, #768]"
+        "ushl v4.8h, v2.8h, v0.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "pslld mm0, mm1": {
@@ -3694,8 +4535,8 @@
         "uqshl d0, d3, #57",
         "ushr d0, d0, #57",
         "dup v0.4s, v0.s[0]",
-        "ushl v2.4s, v2.4s, v0.4s",
-        "str d2, [x28, #768]"
+        "ushl v4.4s, v2.4s, v0.4s",
+        "str d4, [x28, #768]"
       ]
     },
     "psllq mm0, mm1": {
@@ -3707,8 +4548,8 @@
         "uqshl d0, d3, #57",
         "ushr d0, d0, #57",
         "dup v0.2d, v0.d[0]",
-        "ushl v2.2d, v2.2d, v0.2d",
-        "str d2, [x28, #768]"
+        "ushl v4.2d, v2.2d, v0.2d",
+        "str d4, [x28, #768]"
       ]
     },
     "pmuludq mm0, mm1": {
@@ -3717,8 +4558,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "umull v2.2d, v2.2s, v3.2s",
-        "str d2, [x28, #768]"
+        "umull v4.2d, v2.2s, v3.2s",
+        "str d4, [x28, #768]"
       ]
     },
     "pmaddwd mm0, mm1": {
@@ -3727,8 +4568,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "smull v2.4s, v2.4h, v3.4h",
-        "addp v2.4s, v2.4s, v2.4s",
+        "smull v4.4s, v2.4h, v3.4h",
+        "addp v2.4s, v4.4s, v4.4s",
         "str d2, [x28, #768]"
       ]
     },
@@ -3738,21 +4579,23 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "uabdl v2.8h, v2.8b, v3.8b",
-        "addv h2, v2.8h",
+        "uabdl v4.8h, v2.8b, v3.8b",
+        "addv h2, v4.8h",
         "str d2, [x28, #768]"
       ]
     },
     "maskmovq mm0, mm1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x0f 0xf7",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
-        "cmlt v2.16b, v2.16b, #0",
-        "ldr d3, [x28, #768]",
-        "ldr d4, [x11]",
-        "bsl v2.8b, v3.8b, v4.8b",
-        "str d2, [x11]"
+        "cmlt v3.16b, v2.16b, #0",
+        "ldr d2, [x28, #768]",
+        "mov x20, x11",
+        "ldr d4, [x20]",
+        "mov v5.8b, v3.8b",
+        "bsl v5.8b, v2.8b, v4.8b",
+        "str d5, [x20]"
       ]
     },
     "psubb mm0, mm1": {
@@ -3761,8 +4604,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "sub v2.16b, v3.16b, v2.16b",
-        "str d2, [x28, #768]"
+        "sub v4.16b, v3.16b, v2.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "psubw mm0, mm1": {
@@ -3771,8 +4614,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "sub v2.8h, v3.8h, v2.8h",
-        "str d2, [x28, #768]"
+        "sub v4.8h, v3.8h, v2.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "psubd mm0, mm1": {
@@ -3781,8 +4624,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "sub v2.4s, v3.4s, v2.4s",
-        "str d2, [x28, #768]"
+        "sub v4.4s, v3.4s, v2.4s",
+        "str d4, [x28, #768]"
       ]
     },
     "psubq mm0, mm1": {
@@ -3791,8 +4634,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "sub v2.2d, v3.2d, v2.2d",
-        "str d2, [x28, #768]"
+        "sub v4.2d, v3.2d, v2.2d",
+        "str d4, [x28, #768]"
       ]
     },
     "paddb mm0, mm1": {
@@ -3801,8 +4644,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "add v2.16b, v3.16b, v2.16b",
-        "str d2, [x28, #768]"
+        "add v4.16b, v3.16b, v2.16b",
+        "str d4, [x28, #768]"
       ]
     },
     "paddw mm0, mm1": {
@@ -3811,8 +4654,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "add v2.8h, v3.8h, v2.8h",
-        "str d2, [x28, #768]"
+        "add v4.8h, v3.8h, v2.8h",
+        "str d4, [x28, #768]"
       ]
     },
     "paddd mm0, mm1": {
@@ -3821,8 +4664,8 @@
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #784]",
         "ldr d3, [x28, #768]",
-        "add v2.4s, v3.4s, v2.4s",
-        "str d2, [x28, #768]"
+        "add v4.4s, v3.4s, v2.4s",
+        "str d4, [x28, #768]"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/SecondaryGroup.json b/unittests/InstructionCountCI/SecondaryGroup.json
index c0a7059233..b913e06fcd 100644
--- a/unittests/InstructionCountCI/SecondaryGroup.json
+++ b/unittests/InstructionCountCI/SecondaryGroup.json
@@ -13,914 +13,1057 @@
   },
   "Instructions": {
     "sgdt [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP7 0x0F 0x1 /0",
       "ExpectedArm64ASM": [
-        "mov w20, #0x0",
-        "strh w20, [x4]",
-        "mov x20, #0xfffffffffffe0000",
-        "stur x20, [x4, #2]"
+        "mov x20, x4",
+        "mov w21, #0x0",
+        "strh w21, [x20]",
+        "mov x21, #0xfffffffffffe0000",
+        "stur x21, [x20, #2]"
       ]
     },
     "bt ax, 0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /4",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "bt eax, 0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /4",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "bt rax, 0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /4",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "bt ax, 15": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /4",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #15, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "ubfx x21, x20, #15, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "bt eax, 31": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /4",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #31, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "ubfx x21, x20, #31, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "bt rax, 63": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "GROUP8 0x0F 0xBA /4",
       "ExpectedArm64ASM": [
-        "lsr x20, x4, #63",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "lsr x21, x20, #63",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "bt word [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "bt dword [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "bt qword [rax], 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "bt word [rax], 15": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #1]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "ldrb w21, [x20, #1]",
+        "lsr w20, w21, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "bt dword [rax], 31": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #3]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "ldrb w21, [x20, #3]",
+        "lsr w20, w21, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "bt qword [rax], 63": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #7]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "ldrb w21, [x20, #7]",
+        "lsr w20, w21, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "bts ax, 0": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /5",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #0, #1",
-        "lsl x20, x20, #29",
-        "orr w21, w4, #0x1",
-        "bfxil x4, x21, #0, #16",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ubfx x21, x20, #0, #1",
+        "lsl x22, x21, #29",
+        "orr w21, w20, #0x1",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "bts eax, 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /5",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #0, #1",
-        "lsl x20, x20, #29",
-        "orr w4, w4, #0x1",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ubfx x21, x20, #0, #1",
+        "lsl x22, x21, #29",
+        "orr w21, w20, #0x1",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "bts rax, 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /5",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #0, #1",
-        "lsl x20, x20, #29",
-        "orr x4, x4, #0x1",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ubfx x21, x20, #0, #1",
+        "lsl x22, x21, #29",
+        "orr x21, x20, #0x1",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "bts ax, 15": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /5",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #15, #1",
-        "lsl x20, x20, #29",
-        "orr w21, w4, #0x8000",
-        "bfxil x4, x21, #0, #16",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ubfx x21, x20, #15, #1",
+        "lsl x22, x21, #29",
+        "orr w21, w20, #0x8000",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "bts eax, 31": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /5",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #31, #1",
-        "lsl x20, x20, #29",
-        "orr w4, w4, #0x80000000",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ubfx x21, x20, #31, #1",
+        "lsl x22, x21, #29",
+        "orr w21, w20, #0x80000000",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "bts rax, 63": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /5",
       "ExpectedArm64ASM": [
-        "lsr x20, x4, #63",
-        "lsl x20, x20, #29",
-        "orr x4, x4, #0x8000000000000000",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "lsr x21, x20, #63",
+        "lsl x22, x21, #29",
+        "orr x21, x20, #0x8000000000000000",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "bts word [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "orr x21, x20, #0x1",
-        "strb w21, [x4]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "orr x22, x21, #0x1",
+        "strb w22, [x20]",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "bts dword [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "orr x21, x20, #0x1",
-        "strb w21, [x4]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "orr x22, x21, #0x1",
+        "strb w22, [x20]",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "bts qword [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "orr x21, x20, #0x1",
-        "strb w21, [x4]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "orr x22, x21, #0x1",
+        "strb w22, [x20]",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "bts word [rax], 15": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #1]",
-        "orr x21, x20, #0x80",
-        "strb w21, [x4, #1]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "ldrb w21, [x20, #1]",
+        "orr x22, x21, #0x80",
+        "strb w22, [x20, #1]",
+        "lsr w20, w21, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "bts dword [rax], 31": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #3]",
-        "orr x21, x20, #0x80",
-        "strb w21, [x4, #3]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "ldrb w21, [x20, #3]",
+        "orr x22, x21, #0x80",
+        "strb w22, [x20, #3]",
+        "lsr w20, w21, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "bts qword [rax], 63": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #7]",
-        "orr x21, x20, #0x80",
-        "strb w21, [x4, #7]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "ldrb w21, [x20, #7]",
+        "orr x22, x21, #0x80",
+        "strb w22, [x20, #7]",
+        "lsr w20, w21, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "lock bts word [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldsetalb w21, w20, [x20]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldsetalb w20, w22, [x21]",
+        "ubfx x20, x22, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "lock bts dword [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldsetalb w21, w20, [x20]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldsetalb w20, w22, [x21]",
+        "ubfx x20, x22, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "lock bts qword [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldsetalb w21, w20, [x20]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldsetalb w20, w22, [x21]",
+        "ubfx x20, x22, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "lock bts word [rax], 15": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x1 (1)",
-        "mov w21, #0x80",
-        "ldsetalb w21, w20, [x20]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "add x21, x20, #0x1 (1)",
+        "mov w20, #0x80",
+        "ldsetalb w20, w22, [x21]",
+        "lsr w20, w22, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "lock bts dword [rax], 31": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x3 (3)",
-        "mov w21, #0x80",
-        "ldsetalb w21, w20, [x20]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "add x21, x20, #0x3 (3)",
+        "mov w20, #0x80",
+        "ldsetalb w20, w22, [x21]",
+        "lsr w20, w22, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "lock bts qword [rax], 63": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x7 (7)",
-        "mov w21, #0x80",
-        "ldsetalb w21, w20, [x20]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "add x21, x20, #0x7 (7)",
+        "mov w20, #0x80",
+        "ldsetalb w20, w22, [x21]",
+        "lsr w20, w22, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "btr ax, 0": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #0, #1",
-        "lsl x20, x20, #29",
-        "and w21, w4, #0xfffffffe",
-        "bfxil x4, x21, #0, #16",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ubfx x21, x20, #0, #1",
+        "lsl x22, x21, #29",
+        "and w21, w20, #0xfffffffe",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "btr eax, 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #0, #1",
-        "lsl x20, x20, #29",
-        "and w4, w4, #0xfffffffe",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ubfx x21, x20, #0, #1",
+        "lsl x22, x21, #29",
+        "and w21, w20, #0xfffffffe",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "btr rax, 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #0, #1",
-        "lsl x20, x20, #29",
-        "and x4, x4, #0xfffffffffffffffe",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ubfx x21, x20, #0, #1",
+        "lsl x22, x21, #29",
+        "and x21, x20, #0xfffffffffffffffe",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "btr ax, 15": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #15, #1",
-        "lsl x20, x20, #29",
-        "and w21, w4, #0xffff7fff",
-        "bfxil x4, x21, #0, #16",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ubfx x21, x20, #15, #1",
+        "lsl x22, x21, #29",
+        "and w21, w20, #0xffff7fff",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "btr eax, 31": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #31, #1",
-        "lsl x20, x20, #29",
-        "and w4, w4, #0x7fffffff",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ubfx x21, x20, #31, #1",
+        "lsl x22, x21, #29",
+        "and w21, w20, #0x7fffffff",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "btr rax, 63": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "lsr x20, x4, #63",
-        "lsl x20, x20, #29",
-        "and x4, x4, #0x7fffffffffffffff",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "lsr x21, x20, #63",
+        "lsl x22, x21, #29",
+        "and x21, x20, #0x7fffffffffffffff",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "btr word [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "and x21, x20, #0xfffffffffffffffe",
-        "strb w21, [x4]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "and x22, x21, #0xfffffffffffffffe",
+        "strb w22, [x20]",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "btr dword [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "and x21, x20, #0xfffffffffffffffe",
-        "strb w21, [x4]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "and x22, x21, #0xfffffffffffffffe",
+        "strb w22, [x20]",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "btr qword [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "and x21, x20, #0xfffffffffffffffe",
-        "strb w21, [x4]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "and x22, x21, #0xfffffffffffffffe",
+        "strb w22, [x20]",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "btr word [rax], 15": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #1]",
-        "and x21, x20, #0xffffffffffffff7f",
-        "strb w21, [x4, #1]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "ldrb w21, [x20, #1]",
+        "and x22, x21, #0xffffffffffffff7f",
+        "strb w22, [x20, #1]",
+        "lsr w20, w21, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "btr dword [rax], 31": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #3]",
-        "and x21, x20, #0xffffffffffffff7f",
-        "strb w21, [x4, #3]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "ldrb w21, [x20, #3]",
+        "and x22, x21, #0xffffffffffffff7f",
+        "strb w22, [x20, #3]",
+        "lsr w20, w21, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "btr qword [rax], 63": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #7]",
-        "and x21, x20, #0xffffffffffffff7f",
-        "strb w21, [x4, #7]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "ldrb w21, [x20, #7]",
+        "and x22, x21, #0xffffffffffffff7f",
+        "strb w22, [x20, #7]",
+        "lsr w20, w21, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "lock btr word [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldclralb w21, w20, [x20]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldclralb w20, w22, [x21]",
+        "ubfx x20, x22, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "lock btr dword [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldclralb w21, w20, [x20]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldclralb w20, w22, [x21]",
+        "ubfx x20, x22, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "lock btr qword [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldclralb w21, w20, [x20]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldclralb w20, w22, [x21]",
+        "ubfx x20, x22, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "lock btr word [rax], 15": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x1 (1)",
-        "mov w21, #0x80",
-        "ldclralb w21, w20, [x20]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "add x21, x20, #0x1 (1)",
+        "mov w20, #0x80",
+        "ldclralb w20, w22, [x21]",
+        "lsr w20, w22, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "lock btr dword [rax], 31": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x3 (3)",
-        "mov w21, #0x80",
-        "ldclralb w21, w20, [x20]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "add x21, x20, #0x3 (3)",
+        "mov w20, #0x80",
+        "ldclralb w20, w22, [x21]",
+        "lsr w20, w22, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "lock btr qword [rax], 63": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x7 (7)",
-        "mov w21, #0x80",
-        "ldclralb w21, w20, [x20]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "add x21, x20, #0x7 (7)",
+        "mov w20, #0x80",
+        "ldclralb w20, w22, [x21]",
+        "lsr w20, w22, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "btc ax, 0": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /7",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #0, #1",
-        "lsl x20, x20, #29",
-        "eor w21, w4, #0x1",
-        "bfxil x4, x21, #0, #16",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ubfx x21, x20, #0, #1",
+        "lsl x22, x21, #29",
+        "eor w21, w20, #0x1",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "btc eax, 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /7",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #0, #1",
-        "lsl x20, x20, #29",
-        "eor w4, w4, #0x1",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ubfx x21, x20, #0, #1",
+        "lsl x22, x21, #29",
+        "eor w21, w20, #0x1",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "btc rax, 0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /7",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #0, #1",
-        "lsl x20, x20, #29",
-        "eor x4, x4, #0x1",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ubfx x21, x20, #0, #1",
+        "lsl x22, x21, #29",
+        "eor x21, x20, #0x1",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "btc ax, 15": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /7",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #15, #1",
-        "lsl x20, x20, #29",
-        "eor w21, w4, #0x8000",
-        "bfxil x4, x21, #0, #16",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ubfx x21, x20, #15, #1",
+        "lsl x22, x21, #29",
+        "eor w21, w20, #0x8000",
+        "mov x23, x20",
+        "bfxil x23, x21, #0, #16",
+        "mov x4, x23",
+        "msr nzcv, x22"
       ]
     },
     "btc eax, 31": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /7",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #31, #1",
-        "lsl x20, x20, #29",
-        "eor w4, w4, #0x80000000",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ubfx x21, x20, #31, #1",
+        "lsl x22, x21, #29",
+        "eor w21, w20, #0x80000000",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "btc rax, 63": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": "GROUP8 0x0F 0xBA /7",
       "ExpectedArm64ASM": [
-        "lsr x20, x4, #63",
-        "lsl x20, x20, #29",
-        "eor x4, x4, #0x8000000000000000",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "lsr x21, x20, #63",
+        "lsl x22, x21, #29",
+        "eor x21, x20, #0x8000000000000000",
+        "mov x4, x21",
+        "msr nzcv, x22"
       ]
     },
     "btc word [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "eor x21, x20, #0x1",
-        "strb w21, [x4]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "eor x22, x21, #0x1",
+        "strb w22, [x20]",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "btc dword [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "eor x21, x20, #0x1",
-        "strb w21, [x4]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "eor x22, x21, #0x1",
+        "strb w22, [x20]",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "btc qword [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4]",
-        "eor x21, x20, #0x1",
-        "strb w21, [x4]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "ldrb w21, [x20]",
+        "eor x22, x21, #0x1",
+        "strb w22, [x20]",
+        "ubfx x20, x21, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "btc word [rax], 15": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #1]",
-        "eor x21, x20, #0x80",
-        "strb w21, [x4, #1]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "ldrb w21, [x20, #1]",
+        "eor x22, x21, #0x80",
+        "strb w22, [x20, #1]",
+        "lsr w20, w21, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "btc dword [rax], 31": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #3]",
-        "eor x21, x20, #0x80",
-        "strb w21, [x4, #3]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "ldrb w21, [x20, #3]",
+        "eor x22, x21, #0x80",
+        "strb w22, [x20, #3]",
+        "lsr w20, w21, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "btc qword [rax], 63": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "ldrb w20, [x4, #7]",
-        "eor x21, x20, #0x80",
-        "strb w21, [x4, #7]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "ldrb w21, [x20, #7]",
+        "eor x22, x21, #0x80",
+        "strb w22, [x20, #7]",
+        "lsr w20, w21, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "lock btc word [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldeoralb w21, w20, [x20]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldeoralb w20, w22, [x21]",
+        "ubfx x20, x22, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "lock btc dword [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldeoralb w21, w20, [x20]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldeoralb w20, w22, [x21]",
+        "ubfx x20, x22, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "lock btc qword [rax], 0": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x0 (0)",
-        "mov w21, #0x1",
-        "ldeoralb w21, w20, [x20]",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
-        "msr nzcv, x20"
+        "mov x20, x4",
+        "add x21, x20, #0x0 (0)",
+        "mov w20, #0x1",
+        "ldeoralb w20, w22, [x21]",
+        "ubfx x20, x22, #0, #1",
+        "lsl x21, x20, #29",
+        "msr nzcv, x21"
       ]
     },
     "lock btc word [rax], 15": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x1 (1)",
-        "mov w21, #0x80",
-        "ldeoralb w21, w20, [x20]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "add x21, x20, #0x1 (1)",
+        "mov w20, #0x80",
+        "ldeoralb w20, w22, [x21]",
+        "lsr w20, w22, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "lock btc dword [rax], 31": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x3 (3)",
-        "mov w21, #0x80",
-        "ldeoralb w21, w20, [x20]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "add x21, x20, #0x3 (3)",
+        "mov w20, #0x80",
+        "ldeoralb w20, w22, [x21]",
+        "lsr w20, w22, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "lock btc qword [rax], 63": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": "GROUP8 0x0F 0xBA /6",
       "ExpectedArm64ASM": [
-        "add x20, x4, #0x7 (7)",
-        "mov w21, #0x80",
-        "ldeoralb w21, w20, [x20]",
-        "lsr w20, w20, #7",
-        "ubfx x20, x20, #0, #1",
-        "lsl x20, x20, #29",
+        "mov x20, x4",
+        "add x21, x20, #0x7 (7)",
+        "mov w20, #0x80",
+        "ldeoralb w20, w22, [x21]",
+        "lsr w20, w22, #7",
+        "ubfx x21, x20, #0, #1",
+        "lsl x20, x21, #29",
         "msr nzcv, x20"
       ]
     },
     "cmpxchg8b [rbp]": {
-      "ExpectedInstructionCount": 24,
+      "ExpectedInstructionCount": 39,
       "Comment": "GROUP9 0x0F 0xC7 /1",
       "ExpectedArm64ASM": [
-        "add x20, x9, #0x0 (0)",
-        "mov w21, w4",
-        "mov w22, w6",
-        "mov w23, w22",
-        "mov w22, w21",
-        "mov w21, w7",
-        "mov w24, w5",
-        "mov w25, w24",
-        "mov w24, w21",
-        "mov w2, w22",
-        "mov w3, w23",
-        "caspal w2, w3, w24, w25, [x20]",
+        "sub sp, sp, #0x40 (64)",
+        "mov x20, x9",
+        "add x21, x20, #0x0 (0)",
+        "mov x20, x4",
+        "mov w22, w20",
+        "mov x23, x6",
+        "mov w24, w23",
+        "mov x30, x24",
+        "mov w24, w22",
+        "mov w25, w30",
+        "mov x22, x7",
+        "mov w30, w22",
+        "mov x22, x5",
+        "mov w18, w22",
+        "str x23, [sp]",
+        "mov w22, w30",
+        "mov w23, w18",
+        "str x20, [sp, #32]",
+        "mov x30, x21",
+        "mov w2, w24",
+        "mov w3, w25",
+        "caspal w2, w3, w22, w23, [x30]",
         "mov w20, w2",
         "mov w21, w3",
-        "mov w24, w20",
-        "mov w25, w21",
+        "mov w22, w20",
+        "mov w23, w21",
         "mrs x0, nzcv",
-        "cmp w20, w22",
-        "ccmp w21, w23, #nzcv, eq",
+        "cmp w20, w24",
+        "ccmp w21, w25, #nzcv, eq",
         "cset w1, eq",
         "bfi w0, w1, #30, #1",
         "msr nzcv, x0",
-        "csel x4, x24, x4, ne",
-        "csel x6, x25, x6, ne"
+        "ldr x20, [sp, #32]",
+        "csel x21, x22, x20, ne",
+        "mov x4, x21",
+        "ldr x20, [sp]",
+        "csel x21, x23, x20, ne",
+        "mov x6, x21",
+        "add sp, sp, #0x40 (64)"
       ]
     },
     "cmpxchg16b [rbp]": {
-      "ExpectedInstructionCount": 20,
+      "ExpectedInstructionCount": 35,
       "Comment": "GROUP9 0x0F 0xC7 /1",
       "ExpectedArm64ASM": [
-        "add x20, x9, #0x0 (0)",
-        "mov x22, x4",
-        "mov x23, x6",
-        "mov x24, x7",
-        "mov x25, x5",
-        "mov x2, x22",
-        "mov x3, x23",
-        "caspal x2, x3, x24, x25, [x20]",
+        "sub sp, sp, #0x40 (64)",
+        "mov x20, x9",
+        "add x21, x20, #0x0 (0)",
+        "mov x20, x4",
+        "mov x22, x6",
+        "mov x24, x20",
+        "mov x25, x22",
+        "mov x23, x7",
+        "mov x30, x5",
+        "str x22, [sp]",
+        "mov x18, x23",
+        "mov x22, x18",
+        "mov x23, x30",
+        "str x20, [sp, #32]",
+        "mov x30, x21",
+        "mov x2, x24",
+        "mov x3, x25",
+        "caspal x2, x3, x22, x23, [x30]",
         "mov x20, x2",
         "mov x21, x3",
-        "mov x24, x20",
-        "mov x25, x21",
+        "mov x22, x20",
+        "mov x23, x21",
         "mrs x0, nzcv",
-        "cmp w20, w22",
-        "ccmp w21, w23, #nzcv, eq",
+        "cmp w20, w24",
+        "ccmp w21, w25, #nzcv, eq",
         "cset w1, eq",
         "bfi w0, w1, #30, #1",
         "msr nzcv, x0",
-        "csel x4, x24, x4, ne",
-        "csel x6, x25, x6, ne"
+        "ldr x20, [sp, #32]",
+        "csel x21, x22, x20, ne",
+        "mov x4, x21",
+        "ldr x20, [sp]",
+        "csel x21, x23, x20, ne",
+        "mov x6, x21",
+        "add sp, sp, #0x40 (64)"
       ]
     },
     "rdrand ax": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP9 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "mrs x20, rndr",
         "cset x21, ne",
         "mov x22, x20",
-        "mov x20, x21",
-        "bfxil x4, x22, #0, #16",
-        "mov w26, #0x1",
-        "mov w27, #0x0",
-        "lsl x20, x20, #29",
+        "mov x23, x21",
+        "mov x20, x4",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21",
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20",
+        "lsl x20, x23, #29",
         "msr nzcv, x20"
       ]
     },
     "rdrand eax": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP9 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "mrs x20, rndr",
         "cset x21, ne",
         "mov x22, x20",
-        "mov x20, x21",
-        "mov w4, w22",
-        "mov w26, #0x1",
-        "mov w27, #0x0",
-        "lsl x20, x20, #29",
+        "mov x23, x21",
+        "mov w20, w22",
+        "mov x4, x20",
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20",
+        "lsl x20, x23, #29",
         "msr nzcv, x20"
       ]
     },
     "rdrand rax": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP9 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "mrs x20, rndr",
         "cset x21, ne",
-        "mov x4, x20",
-        "mov x20, x21",
-        "mov w26, #0x1",
-        "mov w27, #0x0",
-        "lsl x20, x20, #29",
+        "mov x22, x20",
+        "mov x23, x21",
+        "mov x4, x22",
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20",
+        "lsl x20, x23, #29",
         "msr nzcv, x20"
       ]
     },
     "rdseed ax": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 14,
       "Comment": "GROUP9 0x0F 0xC7 /7",
       "ExpectedArm64ASM": [
         "mrs x20, rndrrs",
         "cset x21, ne",
         "mov x22, x20",
-        "mov x20, x21",
-        "bfxil x4, x22, #0, #16",
-        "mov w26, #0x1",
-        "mov w27, #0x0",
-        "lsl x20, x20, #29",
+        "mov x23, x21",
+        "mov x20, x4",
+        "mov x21, x20",
+        "bfxil x21, x22, #0, #16",
+        "mov x4, x21",
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20",
+        "lsl x20, x23, #29",
         "msr nzcv, x20"
       ]
     },
     "rdseed eax": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": "GROUP9 0x0F 0xC7 /7",
       "ExpectedArm64ASM": [
         "mrs x20, rndrrs",
         "cset x21, ne",
         "mov x22, x20",
-        "mov x20, x21",
-        "mov w4, w22",
-        "mov w26, #0x1",
-        "mov w27, #0x0",
-        "lsl x20, x20, #29",
+        "mov x23, x21",
+        "mov w20, w22",
+        "mov x4, x20",
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20",
+        "lsl x20, x23, #29",
         "msr nzcv, x20"
       ]
     },
     "rdseed rax": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 11,
       "Comment": "GROUP9 0x0F 0xC7 /7",
       "ExpectedArm64ASM": [
         "mrs x20, rndrrs",
         "cset x21, ne",
-        "mov x4, x20",
-        "mov x20, x21",
-        "mov w26, #0x1",
-        "mov w27, #0x0",
-        "lsl x20, x20, #29",
+        "mov x22, x20",
+        "mov x23, x21",
+        "mov x4, x22",
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20",
+        "lsl x20, x23, #29",
         "msr nzcv, x20"
       ]
     },
     "rdpid eax": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 19,
       "Comment": "GROUP9 0xF3 0x0F 0xC7 /7",
       "ExpectedArm64ASM": [
         "mrs x0, nzcv",
@@ -940,11 +1083,12 @@
         "ldr x8, [x28, #40]",
         "str xzr, [x28, #1056]",
         "orr x20, x0, x1, lsl #12",
-        "mov w4, w20"
+        "mov w21, w20",
+        "mov x4, x21"
       ]
     },
     "rdpid rax": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 19,
       "Comment": "GROUP9 0xF3 0x0F 0xC7 /7",
       "ExpectedArm64ASM": [
         "mrs x0, nzcv",
@@ -964,7 +1108,8 @@
         "ldr x8, [x28, #40]",
         "str xzr, [x28, #1056]",
         "orr x20, x0, x1, lsl #12",
-        "mov w4, w20"
+        "mov w21, w20",
+        "mov x4, x21"
       ]
     },
     "psrlw mm0, 0": {
@@ -979,8 +1124,8 @@
       "Comment": "GROUP12 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ushr v2.8h, v2.8h, #15",
-        "str d2, [x28, #768]"
+        "ushr v3.8h, v2.8h, #15",
+        "str d3, [x28, #768]"
       ]
     },
     "psrlw mm0, 16": {
@@ -989,8 +1134,8 @@
       "Comment": "GROUP12 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "movi v2.2d, #0x0",
-        "str d2, [x28, #768]"
+        "movi v3.2d, #0x0",
+        "str d3, [x28, #768]"
       ]
     },
     "psrlw xmm0, 0": {
@@ -1000,19 +1145,23 @@
       "ExpectedArm64ASM": []
     },
     "psrlw xmm0, 15": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP12 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
-        "ushr v16.8h, v16.8h, #15"
+        "mov v2.16b, v16.16b",
+        "ushr v3.8h, v2.8h, #15",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psrlw xmm0, 16": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP12 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov v2.16b, v16.16b",
+        "movi v3.2d, #0x0",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psraw mm0, 0": {
@@ -1027,8 +1176,8 @@
       "Comment": "GROUP12 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "sshr v2.8h, v2.8h, #15",
-        "str d2, [x28, #768]"
+        "sshr v3.8h, v2.8h, #15",
+        "str d3, [x28, #768]"
       ]
     },
     "psraw mm0, 16": {
@@ -1037,8 +1186,8 @@
       "Comment": "GROUP12 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "sshr v2.8h, v2.8h, #15",
-        "str d2, [x28, #768]"
+        "sshr v3.8h, v2.8h, #15",
+        "str d3, [x28, #768]"
       ]
     },
     "psraw xmm0, 0": {
@@ -1048,19 +1197,23 @@
       "ExpectedArm64ASM": []
     },
     "psraw xmm0, 15": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP12 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
-        "sshr v16.8h, v16.8h, #15"
+        "mov v2.16b, v16.16b",
+        "sshr v3.8h, v2.8h, #15",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psraw xmm0, 16": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP12 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
-        "sshr v16.8h, v16.8h, #15"
+        "mov v2.16b, v16.16b",
+        "sshr v3.8h, v2.8h, #15",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psllw mm0, 0": {
@@ -1075,8 +1228,8 @@
       "Comment": "GROUP12 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "shl v2.8h, v2.8h, #15",
-        "str d2, [x28, #768]"
+        "shl v3.8h, v2.8h, #15",
+        "str d3, [x28, #768]"
       ]
     },
     "psllw mm0, 16": {
@@ -1085,8 +1238,8 @@
       "Comment": "GROUP12 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "movi v2.2d, #0x0",
-        "str d2, [x28, #768]"
+        "movi v3.2d, #0x0",
+        "str d3, [x28, #768]"
       ]
     },
     "psllw xmm0, 0": {
@@ -1096,19 +1249,23 @@
       "ExpectedArm64ASM": []
     },
     "psllw xmm0, 15": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP12 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
-        "shl v16.8h, v16.8h, #15"
+        "mov v2.16b, v16.16b",
+        "shl v3.8h, v2.8h, #15",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psllw xmm0, 16": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP12 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov v2.16b, v16.16b",
+        "movi v3.2d, #0x0",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psrld mm0, 0": {
@@ -1123,8 +1280,8 @@
       "Comment": "GROUP13 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ushr v2.4s, v2.4s, #31",
-        "str d2, [x28, #768]"
+        "ushr v3.4s, v2.4s, #31",
+        "str d3, [x28, #768]"
       ]
     },
     "psrld mm0, 32": {
@@ -1133,8 +1290,8 @@
       "Comment": "GROUP13 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "movi v2.2d, #0x0",
-        "str d2, [x28, #768]"
+        "movi v3.2d, #0x0",
+        "str d3, [x28, #768]"
       ]
     },
     "psrld xmm0, 0": {
@@ -1144,19 +1301,23 @@
       "ExpectedArm64ASM": []
     },
     "psrld xmm0, 31": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP13 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
-        "ushr v16.4s, v16.4s, #31"
+        "mov v2.16b, v16.16b",
+        "ushr v3.4s, v2.4s, #31",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psrld xmm0, 32": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP13 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov v2.16b, v16.16b",
+        "movi v3.2d, #0x0",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psrad mm0, 0": {
@@ -1171,8 +1332,8 @@
       "Comment": "GROUP13 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "sshr v2.4s, v2.4s, #31",
-        "str d2, [x28, #768]"
+        "sshr v3.4s, v2.4s, #31",
+        "str d3, [x28, #768]"
       ]
     },
     "psrad mm0, 32": {
@@ -1181,8 +1342,8 @@
       "Comment": "GROUP13 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "sshr v2.4s, v2.4s, #31",
-        "str d2, [x28, #768]"
+        "sshr v3.4s, v2.4s, #31",
+        "str d3, [x28, #768]"
       ]
     },
     "psrad xmm0, 0": {
@@ -1192,19 +1353,23 @@
       "ExpectedArm64ASM": []
     },
     "psrad xmm0, 31": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP13 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
-        "sshr v16.4s, v16.4s, #31"
+        "mov v2.16b, v16.16b",
+        "sshr v3.4s, v2.4s, #31",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psrad xmm0, 32": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP13 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
-        "sshr v16.4s, v16.4s, #31"
+        "mov v2.16b, v16.16b",
+        "sshr v3.4s, v2.4s, #31",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pslld mm0, 0": {
@@ -1219,8 +1384,8 @@
       "Comment": "GROUP13 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "shl v2.4s, v2.4s, #31",
-        "str d2, [x28, #768]"
+        "shl v3.4s, v2.4s, #31",
+        "str d3, [x28, #768]"
       ]
     },
     "pslld mm0, 32": {
@@ -1229,8 +1394,8 @@
       "Comment": "GROUP13 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "movi v2.2d, #0x0",
-        "str d2, [x28, #768]"
+        "movi v3.2d, #0x0",
+        "str d3, [x28, #768]"
       ]
     },
     "pslld xmm0, 0": {
@@ -1240,19 +1405,23 @@
       "ExpectedArm64ASM": []
     },
     "pslld xmm0, 31": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP13 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
-        "shl v16.4s, v16.4s, #31"
+        "mov v2.16b, v16.16b",
+        "shl v3.4s, v2.4s, #31",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pslld xmm0, 32": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP13 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov v2.16b, v16.16b",
+        "movi v3.2d, #0x0",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psrlq mm0, 0": {
@@ -1267,8 +1436,8 @@
       "Comment": "GROUP14 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "ushr v2.2d, v2.2d, #63",
-        "str d2, [x28, #768]"
+        "ushr v3.2d, v2.2d, #63",
+        "str d3, [x28, #768]"
       ]
     },
     "psrlq mm0, 64": {
@@ -1277,8 +1446,8 @@
       "Comment": "GROUP14 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "movi v2.2d, #0x0",
-        "str d2, [x28, #768]"
+        "movi v3.2d, #0x0",
+        "str d3, [x28, #768]"
       ]
     },
     "psrlq xmm0, 0": {
@@ -1288,19 +1457,23 @@
       "ExpectedArm64ASM": []
     },
     "psrlq xmm0, 63": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP14 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
-        "ushr v16.2d, v16.2d, #63"
+        "mov v2.16b, v16.16b",
+        "ushr v3.2d, v2.2d, #63",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psrlq xmm0, 64": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP14 0x0F 0xC7 /2",
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov v2.16b, v16.16b",
+        "movi v3.2d, #0x0",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psrldq xmm0, 0": {
@@ -1310,20 +1483,23 @@
       "ExpectedArm64ASM": []
     },
     "psrldq xmm0, 15": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Type": "SSE",
       "Comment": "GROUP14 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "ext v16.16b, v16.16b, v2.16b, #15"
+        "mov v2.16b, v16.16b",
+        "movi v3.2d, #0x0",
+        "ext v4.16b, v2.16b, v3.16b, #15",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psrldq xmm0, 16": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Type": "SSE",
       "Comment": "GROUP14 0x0F 0xC7 /3",
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b"
       ]
     },
     "psllq mm0, 0": {
@@ -1338,8 +1514,8 @@
       "Comment": "GROUP14 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "shl v2.2d, v2.2d, #63",
-        "str d2, [x28, #768]"
+        "shl v3.2d, v2.2d, #63",
+        "str d3, [x28, #768]"
       ]
     },
     "psllq mm0, 64": {
@@ -1348,8 +1524,8 @@
       "Comment": "GROUP14 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "movi v2.2d, #0x0",
-        "str d2, [x28, #768]"
+        "movi v3.2d, #0x0",
+        "str d3, [x28, #768]"
       ]
     },
     "psllq xmm0, 0": {
@@ -1359,152 +1535,194 @@
       "ExpectedArm64ASM": []
     },
     "psllq xmm0, 63": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP14 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
-        "shl v16.2d, v16.2d, #63"
+        "mov v2.16b, v16.16b",
+        "shl v3.2d, v2.2d, #63",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psllq xmm0, 64": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Type": "SSE",
       "Comment": "GROUP14 0x0F 0xC7 /6",
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov v2.16b, v16.16b",
+        "movi v3.2d, #0x0",
+        "mov v16.16b, v3.16b"
       ]
     },
     "fxsave [rax]": {
-      "ExpectedInstructionCount": 58,
+      "ExpectedInstructionCount": 77,
       "Comment": "GROUP15 0x0F 0xAE /0",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x28, #1024]",
-        "strh w20, [x4]",
-        "mov w20, #0x0",
-        "ldrb w21, [x28, #747]",
-        "bfi x20, x21, #11, #3",
+        "mov x20, x4",
+        "ldrh w21, [x28, #1024]",
+        "strh w21, [x20]",
+        "mov w21, #0x0",
+        "ldrb w22, [x28, #747]",
+        "mov x23, x21",
+        "bfi x23, x22, #11, #3",
         "ldrb w21, [x28, #744]",
         "ldrb w22, [x28, #745]",
-        "ldrb w23, [x28, #746]",
-        "ldrb w24, [x28, #750]",
-        "orr x20, x20, x21, lsl #8",
-        "orr x20, x20, x22, lsl #9",
-        "orr x20, x20, x23, lsl #10",
-        "orr x20, x20, x24, lsl #14",
-        "strh w20, [x4, #2]",
-        "ldrb w20, [x28, #1026]",
-        "strb w20, [x4, #4]",
+        "ldrb w24, [x28, #746]",
+        "ldrb w25, [x28, #750]",
+        "orr x30, x23, x21, lsl #8",
+        "orr x21, x30, x22, lsl #9",
+        "orr x22, x21, x24, lsl #10",
+        "orr x21, x22, x25, lsl #14",
+        "strh w21, [x20, #2]",
+        "ldrb w21, [x28, #1026]",
+        "strb w21, [x20, #4]",
         "ldr q2, [x28, #768]",
-        "str q2, [x4, #32]",
+        "str q2, [x20, #32]",
         "ldr q2, [x28, #784]",
-        "str q2, [x4, #48]",
+        "str q2, [x20, #48]",
         "ldr q2, [x28, #800]",
-        "str q2, [x4, #64]",
+        "str q2, [x20, #64]",
         "ldr q2, [x28, #816]",
-        "str q2, [x4, #80]",
+        "str q2, [x20, #80]",
         "ldr q2, [x28, #832]",
-        "str q2, [x4, #96]",
+        "str q2, [x20, #96]",
         "ldr q2, [x28, #848]",
-        "str q2, [x4, #112]",
+        "str q2, [x20, #112]",
         "ldr q2, [x28, #864]",
-        "str q2, [x4, #128]",
+        "str q2, [x20, #128]",
         "ldr q2, [x28, #880]",
-        "str q2, [x4, #144]",
-        "str q16, [x4, #160]",
-        "str q17, [x4, #176]",
-        "str q18, [x4, #192]",
-        "str q19, [x4, #208]",
-        "str q20, [x4, #224]",
-        "str q21, [x4, #240]",
-        "str q22, [x4, #256]",
-        "str q23, [x4, #272]",
-        "str q24, [x4, #288]",
-        "str q25, [x4, #304]",
-        "str q26, [x4, #320]",
-        "str q27, [x4, #336]",
-        "str q28, [x4, #352]",
-        "str q29, [x4, #368]",
-        "str q30, [x4, #384]",
-        "str q31, [x4, #400]",
-        "mov w20, #0x1f80",
-        "mrs x21, fpcr",
-        "ubfx x21, x21, #22, #3",
-        "rbit w0, w21",
-        "bfi x21, x0, #30, #2",
-        "bfi w20, w21, #13, #3",
-        "add x21, x4, #0x18 (24)",
-        "str w20, [x4, #24]",
+        "str q2, [x20, #144]",
+        "mov v2.16b, v16.16b",
+        "str q2, [x20, #160]",
+        "mov v2.16b, v17.16b",
+        "str q2, [x20, #176]",
+        "mov v2.16b, v18.16b",
+        "str q2, [x20, #192]",
+        "mov v2.16b, v19.16b",
+        "str q2, [x20, #208]",
+        "mov v2.16b, v20.16b",
+        "str q2, [x20, #224]",
+        "mov v2.16b, v21.16b",
+        "str q2, [x20, #240]",
+        "mov v2.16b, v22.16b",
+        "str q2, [x20, #256]",
+        "mov v2.16b, v23.16b",
+        "str q2, [x20, #272]",
+        "mov v2.16b, v24.16b",
+        "str q2, [x20, #288]",
+        "mov v2.16b, v25.16b",
+        "str q2, [x20, #304]",
+        "mov v2.16b, v26.16b",
+        "str q2, [x20, #320]",
+        "mov v2.16b, v27.16b",
+        "str q2, [x20, #336]",
+        "mov v2.16b, v28.16b",
+        "str q2, [x20, #352]",
+        "mov v2.16b, v29.16b",
+        "str q2, [x20, #368]",
+        "mov v2.16b, v30.16b",
+        "str q2, [x20, #384]",
+        "mov v2.16b, v31.16b",
+        "str q2, [x20, #400]",
+        "mov w21, #0x1f80",
+        "mrs x22, fpcr",
+        "ubfx x22, x22, #22, #3",
+        "rbit w0, w22",
+        "bfi x22, x0, #30, #2",
+        "mov w23, w21",
+        "bfi w23, w22, #13, #3",
+        "add x21, x20, #0x18 (24)",
+        "str w23, [x20, #24]",
         "mov w20, #0xffff",
         "str w20, [x21, #4]"
       ]
     },
     "rdfsbase eax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP15 0x0F 0xAE /0",
       "ExpectedArm64ASM": [
-        "ldr w4, [x28, #176]"
+        "ldr w20, [x28, #176]",
+        "mov x4, x20"
       ]
     },
     "rdfsbase rax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP15 0x0F 0xAE /0",
       "ExpectedArm64ASM": [
-        "ldr x4, [x28, #176]"
+        "ldr x20, [x28, #176]",
+        "mov x4, x20"
       ]
     },
     "fxrstor [rax]": {
-      "ExpectedInstructionCount": 56,
+      "ExpectedInstructionCount": 73,
       "Comment": "GROUP15 0x0F 0xAE /1",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x4]",
-        "strh w20, [x28, #1024]",
-        "ldrh w20, [x4, #2]",
-        "ubfx w21, w20, #11, #3",
-        "strb w21, [x28, #747]",
-        "ubfx w21, w20, #8, #1",
-        "ubfx w22, w20, #9, #1",
-        "ubfx w23, w20, #10, #1",
-        "ubfx w20, w20, #14, #1",
-        "strb w21, [x28, #744]",
-        "strb w22, [x28, #745]",
-        "strb w23, [x28, #746]",
-        "strb w20, [x28, #750]",
-        "ldrb w20, [x4, #4]",
-        "strb w20, [x28, #1026]",
-        "ldr q2, [x4, #32]",
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "strh w21, [x28, #1024]",
+        "ldrh w21, [x20, #2]",
+        "ubfx w22, w21, #11, #3",
+        "strb w22, [x28, #747]",
+        "ubfx w22, w21, #8, #1",
+        "ubfx w23, w21, #9, #1",
+        "ubfx w24, w21, #10, #1",
+        "ubfx w25, w21, #14, #1",
+        "strb w22, [x28, #744]",
+        "strb w23, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w25, [x28, #750]",
+        "ldrb w21, [x20, #4]",
+        "strb w21, [x28, #1026]",
+        "ldr q2, [x20, #32]",
         "str q2, [x28, #768]",
-        "ldr q2, [x4, #48]",
+        "ldr q2, [x20, #48]",
         "str q2, [x28, #784]",
-        "ldr q2, [x4, #64]",
+        "ldr q2, [x20, #64]",
         "str q2, [x28, #800]",
-        "ldr q2, [x4, #80]",
+        "ldr q2, [x20, #80]",
         "str q2, [x28, #816]",
-        "ldr q2, [x4, #96]",
+        "ldr q2, [x20, #96]",
         "str q2, [x28, #832]",
-        "ldr q2, [x4, #112]",
+        "ldr q2, [x20, #112]",
         "str q2, [x28, #848]",
-        "ldr q2, [x4, #128]",
+        "ldr q2, [x20, #128]",
         "str q2, [x28, #864]",
-        "ldr q2, [x4, #144]",
+        "ldr q2, [x20, #144]",
         "str q2, [x28, #880]",
-        "ldr q16, [x4, #160]",
-        "ldr q17, [x4, #176]",
-        "ldr q18, [x4, #192]",
-        "ldr q19, [x4, #208]",
-        "ldr q20, [x4, #224]",
-        "ldr q21, [x4, #240]",
-        "ldr q22, [x4, #256]",
-        "ldr q23, [x4, #272]",
-        "ldr q24, [x4, #288]",
-        "ldr q25, [x4, #304]",
-        "ldr q26, [x4, #320]",
-        "ldr q27, [x4, #336]",
-        "ldr q28, [x4, #352]",
-        "ldr q29, [x4, #368]",
-        "ldr q30, [x4, #384]",
-        "ldr q31, [x4, #400]",
-        "ldr w20, [x4, #24]",
-        "ubfx w20, w20, #13, #3",
+        "ldr q2, [x20, #160]",
+        "mov v16.16b, v2.16b",
+        "ldr q2, [x20, #176]",
+        "mov v17.16b, v2.16b",
+        "ldr q2, [x20, #192]",
+        "mov v18.16b, v2.16b",
+        "ldr q2, [x20, #208]",
+        "mov v19.16b, v2.16b",
+        "ldr q2, [x20, #224]",
+        "mov v20.16b, v2.16b",
+        "ldr q2, [x20, #240]",
+        "mov v21.16b, v2.16b",
+        "ldr q2, [x20, #256]",
+        "mov v22.16b, v2.16b",
+        "ldr q2, [x20, #272]",
+        "mov v23.16b, v2.16b",
+        "ldr q2, [x20, #288]",
+        "mov v24.16b, v2.16b",
+        "ldr q2, [x20, #304]",
+        "mov v25.16b, v2.16b",
+        "ldr q2, [x20, #320]",
+        "mov v26.16b, v2.16b",
+        "ldr q2, [x20, #336]",
+        "mov v27.16b, v2.16b",
+        "ldr q2, [x20, #352]",
+        "mov v28.16b, v2.16b",
+        "ldr q2, [x20, #368]",
+        "mov v29.16b, v2.16b",
+        "ldr q2, [x20, #384]",
+        "mov v30.16b, v2.16b",
+        "ldr q2, [x20, #400]",
+        "mov v31.16b, v2.16b",
+        "ldr w21, [x20, #24]",
+        "ubfx w20, w21, #13, #3",
         "rbit w1, w20",
         "lsr w1, w1, #30",
         "mrs x0, fpcr",
@@ -1515,25 +1733,28 @@
       ]
     },
     "rdgsbase eax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP15 0x0F 0xAE /1",
       "ExpectedArm64ASM": [
-        "ldr w4, [x28, #168]"
+        "ldr w20, [x28, #168]",
+        "mov x4, x20"
       ]
     },
     "rdgsbase rax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP15 0x0F 0xAE /1",
       "ExpectedArm64ASM": [
-        "ldr x4, [x28, #168]"
+        "ldr x20, [x28, #168]",
+        "mov x4, x20"
       ]
     },
     "ldmxcsr [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": "GROUP15 0x0F 0xAE /2",
       "ExpectedArm64ASM": [
-        "ldr w20, [x4]",
-        "ubfx w20, w20, #13, #3",
+        "mov x20, x4",
+        "ldr w21, [x20]",
+        "ubfx w20, w21, #13, #3",
         "rbit w1, w20",
         "lsr w1, w1, #30",
         "mrs x0, fpcr",
@@ -1544,22 +1765,24 @@
       ]
     },
     "wrfsbase eax": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP15 0x0F 0xAE /2",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "str x20, [x28, #176]"
+        "mov x20, x4",
+        "mov w21, w20",
+        "str x21, [x28, #176]"
       ]
     },
     "wrfsbase rax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP15 0x0F 0xAE /2",
       "ExpectedArm64ASM": [
-        "str x4, [x28, #176]"
+        "mov x20, x4",
+        "str x20, [x28, #176]"
       ]
     },
     "stmxcsr [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "GROUP15 0x0F 0xAE /3",
       "ExpectedArm64ASM": [
         "mov w20, #0x1f80",
@@ -1567,98 +1790,127 @@
         "ubfx x21, x21, #22, #3",
         "rbit w0, w21",
         "bfi x21, x0, #30, #2",
-        "bfi w20, w21, #13, #3",
-        "str w20, [x4]"
+        "mov w22, w20",
+        "bfi w22, w21, #13, #3",
+        "mov x20, x4",
+        "str w22, [x20]"
       ]
     },
     "wrgsbase eax": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP15 0x0F 0xAE /3",
       "ExpectedArm64ASM": [
-        "mov w20, w4",
-        "str x20, [x28, #168]"
+        "mov x20, x4",
+        "mov w21, w20",
+        "str x21, [x28, #168]"
       ]
     },
     "wrgsbase rax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP15 0x0F 0xAE /3",
       "ExpectedArm64ASM": [
-        "str x4, [x28, #168]"
+        "mov x20, x4",
+        "str x20, [x28, #168]"
       ]
     },
     "xsave [rax]": {
-      "ExpectedInstructionCount": 69,
+      "ExpectedInstructionCount": 94,
       "Comment": "GROUP15 0x0F 0xAE /4",
       "ExpectedArm64ASM": [
-        "ubfx x20, x4, #0, #1",
-        "cbnz x20, #+0x8",
-        "b #+0x84",
-        "ldrh w20, [x28, #1024]",
-        "strh w20, [x4]",
-        "mov w20, #0x0",
-        "ldrb w21, [x28, #747]",
-        "bfi x20, x21, #11, #3",
+        "mov x20, x4",
+        "ubfx x21, x20, #0, #1",
+        "cbnz x21, #+0x8",
+        "b #+0x8c",
+        "mov x20, x4",
+        "ldrh w21, [x28, #1024]",
+        "strh w21, [x20]",
+        "mov w21, #0x0",
+        "ldrb w22, [x28, #747]",
+        "mov x23, x21",
+        "bfi x23, x22, #11, #3",
         "ldrb w21, [x28, #744]",
         "ldrb w22, [x28, #745]",
-        "ldrb w23, [x28, #746]",
-        "ldrb w24, [x28, #750]",
-        "orr x20, x20, x21, lsl #8",
-        "orr x20, x20, x22, lsl #9",
-        "orr x20, x20, x23, lsl #10",
-        "orr x20, x20, x24, lsl #14",
-        "strh w20, [x4, #2]",
-        "ldrb w20, [x28, #1026]",
-        "strb w20, [x4, #4]",
+        "ldrb w24, [x28, #746]",
+        "ldrb w25, [x28, #750]",
+        "orr x30, x23, x21, lsl #8",
+        "orr x21, x30, x22, lsl #9",
+        "orr x22, x21, x24, lsl #10",
+        "orr x21, x22, x25, lsl #14",
+        "strh w21, [x20, #2]",
+        "ldrb w21, [x28, #1026]",
+        "strb w21, [x20, #4]",
         "ldr q2, [x28, #768]",
-        "str q2, [x4, #32]",
+        "str q2, [x20, #32]",
         "ldr q2, [x28, #784]",
-        "str q2, [x4, #48]",
+        "str q2, [x20, #48]",
         "ldr q2, [x28, #800]",
-        "str q2, [x4, #64]",
+        "str q2, [x20, #64]",
         "ldr q2, [x28, #816]",
-        "str q2, [x4, #80]",
+        "str q2, [x20, #80]",
         "ldr q2, [x28, #832]",
-        "str q2, [x4, #96]",
+        "str q2, [x20, #96]",
         "ldr q2, [x28, #848]",
-        "str q2, [x4, #112]",
+        "str q2, [x20, #112]",
         "ldr q2, [x28, #864]",
-        "str q2, [x4, #128]",
+        "str q2, [x20, #128]",
         "ldr q2, [x28, #880]",
-        "str q2, [x4, #144]",
-        "ubfx x20, x4, #1, #1",
-        "cbnz x20, #+0x8",
-        "b #+0x44",
-        "str q16, [x4, #160]",
-        "str q17, [x4, #176]",
-        "str q18, [x4, #192]",
-        "str q19, [x4, #208]",
-        "str q20, [x4, #224]",
-        "str q21, [x4, #240]",
-        "str q22, [x4, #256]",
-        "str q23, [x4, #272]",
-        "str q24, [x4, #288]",
-        "str q25, [x4, #304]",
-        "str q26, [x4, #320]",
-        "str q27, [x4, #336]",
-        "str q28, [x4, #352]",
-        "str q29, [x4, #368]",
-        "str q30, [x4, #384]",
-        "str q31, [x4, #400]",
-        "ubfx x20, x4, #1, #2",
-        "cbnz x20, #+0x8",
-        "b #+0x2c",
-        "mov w20, #0x1f80",
-        "mrs x21, fpcr",
-        "ubfx x21, x21, #22, #3",
-        "rbit w0, w21",
-        "bfi x21, x0, #30, #2",
-        "bfi w20, w21, #13, #3",
-        "add x21, x4, #0x18 (24)",
-        "str w20, [x4, #24]",
+        "str q2, [x20, #144]",
+        "mov x20, x4",
+        "ubfx x21, x20, #1, #1",
+        "cbnz x21, #+0x8",
+        "b #+0x88",
+        "mov x20, x4",
+        "mov v2.16b, v16.16b",
+        "str q2, [x20, #160]",
+        "mov v2.16b, v17.16b",
+        "str q2, [x20, #176]",
+        "mov v2.16b, v18.16b",
+        "str q2, [x20, #192]",
+        "mov v2.16b, v19.16b",
+        "str q2, [x20, #208]",
+        "mov v2.16b, v20.16b",
+        "str q2, [x20, #224]",
+        "mov v2.16b, v21.16b",
+        "str q2, [x20, #240]",
+        "mov v2.16b, v22.16b",
+        "str q2, [x20, #256]",
+        "mov v2.16b, v23.16b",
+        "str q2, [x20, #272]",
+        "mov v2.16b, v24.16b",
+        "str q2, [x20, #288]",
+        "mov v2.16b, v25.16b",
+        "str q2, [x20, #304]",
+        "mov v2.16b, v26.16b",
+        "str q2, [x20, #320]",
+        "mov v2.16b, v27.16b",
+        "str q2, [x20, #336]",
+        "mov v2.16b, v28.16b",
+        "str q2, [x20, #352]",
+        "mov v2.16b, v29.16b",
+        "str q2, [x20, #368]",
+        "mov v2.16b, v30.16b",
+        "str q2, [x20, #384]",
+        "mov v2.16b, v31.16b",
+        "str q2, [x20, #400]",
+        "mov x20, x4",
+        "ubfx x21, x20, #1, #2",
+        "cbnz x21, #+0x8",
+        "b #+0x34",
+        "mov x20, x4",
+        "mov w21, #0x1f80",
+        "mrs x22, fpcr",
+        "ubfx x22, x22, #22, #3",
+        "rbit w0, w22",
+        "bfi x22, x0, #30, #2",
+        "mov w23, w21",
+        "bfi w23, w22, #13, #3",
+        "add x21, x20, #0x18 (24)",
+        "str w23, [x20, #24]",
         "mov w20, #0xffff",
         "str w20, [x21, #4]",
-        "ubfx x20, x4, #0, #3",
-        "str x20, [x4, #512]"
+        "mov x20, x4",
+        "ubfx x21, x20, #0, #3",
+        "str x21, [x20, #512]"
       ]
     },
     "lfence": {
@@ -1669,43 +1921,45 @@
       ]
     },
     "xrstor [rax]": {
-      "ExpectedInstructionCount": 105,
+      "ExpectedInstructionCount": 128,
       "Comment": "GROUP15 0x0F 0xAE /5",
       "ExpectedArm64ASM": [
-        "ldr x20, [x4, #512]",
-        "ubfx x20, x20, #0, #1",
+        "mov x20, x4",
+        "ldr x21, [x20, #512]",
+        "ubfx x20, x21, #0, #1",
         "cbnz x20, #+0x8",
-        "b #+0x84",
-        "ldrh w20, [x4]",
-        "strh w20, [x28, #1024]",
-        "ldrh w20, [x4, #2]",
-        "ubfx w21, w20, #11, #3",
-        "strb w21, [x28, #747]",
-        "ubfx w21, w20, #8, #1",
-        "ubfx w22, w20, #9, #1",
-        "ubfx w23, w20, #10, #1",
-        "ubfx w20, w20, #14, #1",
-        "strb w21, [x28, #744]",
-        "strb w22, [x28, #745]",
-        "strb w23, [x28, #746]",
-        "strb w20, [x28, #750]",
-        "ldrb w20, [x4, #4]",
-        "strb w20, [x28, #1026]",
-        "ldr q2, [x4, #32]",
+        "b #+0x88",
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "strh w21, [x28, #1024]",
+        "ldrh w21, [x20, #2]",
+        "ubfx w22, w21, #11, #3",
+        "strb w22, [x28, #747]",
+        "ubfx w22, w21, #8, #1",
+        "ubfx w23, w21, #9, #1",
+        "ubfx w24, w21, #10, #1",
+        "ubfx w25, w21, #14, #1",
+        "strb w22, [x28, #744]",
+        "strb w23, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w25, [x28, #750]",
+        "ldrb w21, [x20, #4]",
+        "strb w21, [x28, #1026]",
+        "ldr q2, [x20, #32]",
         "str q2, [x28, #768]",
-        "ldr q2, [x4, #48]",
+        "ldr q2, [x20, #48]",
         "str q2, [x28, #784]",
-        "ldr q2, [x4, #64]",
+        "ldr q2, [x20, #64]",
         "str q2, [x28, #800]",
-        "ldr q2, [x4, #80]",
+        "ldr q2, [x20, #80]",
         "str q2, [x28, #816]",
-        "ldr q2, [x4, #96]",
+        "ldr q2, [x20, #96]",
         "str q2, [x28, #832]",
-        "ldr q2, [x4, #112]",
+        "ldr q2, [x20, #112]",
         "str q2, [x28, #848]",
-        "ldr q2, [x4, #128]",
+        "ldr q2, [x20, #128]",
         "str q2, [x28, #864]",
-        "ldr q2, [x4, #144]",
+        "ldr q2, [x20, #144]",
         "str q2, [x28, #880]",
         "b #+0x4c",
         "mov w20, #0x0",
@@ -1726,49 +1980,70 @@
         "str q2, [x28, #848]",
         "str q2, [x28, #864]",
         "str q2, [x28, #880]",
-        "ldr x20, [x4, #512]",
-        "ubfx x20, x20, #1, #1",
+        "mov x20, x4",
+        "ldr x21, [x20, #512]",
+        "ubfx x20, x21, #1, #1",
         "cbnz x20, #+0x8",
+        "b #+0x8c",
+        "mov x20, x4",
+        "ldr q2, [x20, #160]",
+        "mov v16.16b, v2.16b",
+        "ldr q2, [x20, #176]",
+        "mov v17.16b, v2.16b",
+        "ldr q2, [x20, #192]",
+        "mov v18.16b, v2.16b",
+        "ldr q2, [x20, #208]",
+        "mov v19.16b, v2.16b",
+        "ldr q2, [x20, #224]",
+        "mov v20.16b, v2.16b",
+        "ldr q2, [x20, #240]",
+        "mov v21.16b, v2.16b",
+        "ldr q2, [x20, #256]",
+        "mov v22.16b, v2.16b",
+        "ldr q2, [x20, #272]",
+        "mov v23.16b, v2.16b",
+        "ldr q2, [x20, #288]",
+        "mov v24.16b, v2.16b",
+        "ldr q2, [x20, #304]",
+        "mov v25.16b, v2.16b",
+        "ldr q2, [x20, #320]",
+        "mov v26.16b, v2.16b",
+        "ldr q2, [x20, #336]",
+        "mov v27.16b, v2.16b",
+        "ldr q2, [x20, #352]",
+        "mov v28.16b, v2.16b",
+        "ldr q2, [x20, #368]",
+        "mov v29.16b, v2.16b",
+        "ldr q2, [x20, #384]",
+        "mov v30.16b, v2.16b",
+        "ldr q2, [x20, #400]",
+        "mov v31.16b, v2.16b",
         "b #+0x48",
-        "ldr q16, [x4, #160]",
-        "ldr q17, [x4, #176]",
-        "ldr q18, [x4, #192]",
-        "ldr q19, [x4, #208]",
-        "ldr q20, [x4, #224]",
-        "ldr q21, [x4, #240]",
-        "ldr q22, [x4, #256]",
-        "ldr q23, [x4, #272]",
-        "ldr q24, [x4, #288]",
-        "ldr q25, [x4, #304]",
-        "ldr q26, [x4, #320]",
-        "ldr q27, [x4, #336]",
-        "ldr q28, [x4, #352]",
-        "ldr q29, [x4, #368]",
-        "ldr q30, [x4, #384]",
-        "ldr q31, [x4, #400]",
-        "b #+0x44",
-        "movi v16.2d, #0x0",
-        "mov v17.16b, v16.16b",
-        "mov v18.16b, v16.16b",
-        "mov v19.16b, v16.16b",
-        "mov v20.16b, v16.16b",
-        "mov v21.16b, v16.16b",
-        "mov v22.16b, v16.16b",
-        "mov v23.16b, v16.16b",
-        "mov v24.16b, v16.16b",
-        "mov v25.16b, v16.16b",
-        "mov v26.16b, v16.16b",
-        "mov v27.16b, v16.16b",
-        "mov v28.16b, v16.16b",
-        "mov v29.16b, v16.16b",
-        "mov v30.16b, v16.16b",
-        "mov v31.16b, v16.16b",
-        "ldr x20, [x4, #512]",
-        "ubfx x20, x20, #1, #2",
+        "movi v2.2d, #0x0",
+        "mov v16.16b, v2.16b",
+        "mov v17.16b, v2.16b",
+        "mov v18.16b, v2.16b",
+        "mov v19.16b, v2.16b",
+        "mov v20.16b, v2.16b",
+        "mov v21.16b, v2.16b",
+        "mov v22.16b, v2.16b",
+        "mov v23.16b, v2.16b",
+        "mov v24.16b, v2.16b",
+        "mov v25.16b, v2.16b",
+        "mov v26.16b, v2.16b",
+        "mov v27.16b, v2.16b",
+        "mov v28.16b, v2.16b",
+        "mov v29.16b, v2.16b",
+        "mov v30.16b, v2.16b",
+        "mov v31.16b, v2.16b",
+        "mov x20, x4",
+        "ldr x21, [x20, #512]",
+        "ubfx x20, x21, #1, #2",
         "cbnz x20, #+0x8",
-        "b #+0x2c",
-        "ldr w20, [x4, #24]",
-        "ubfx w20, w20, #13, #3",
+        "b #+0x30",
+        "mov x20, x4",
+        "ldr w21, [x20, #24]",
+        "ubfx w20, w21, #13, #3",
         "rbit w1, w20",
         "lsr w1, w1, #30",
         "mrs x0, fpcr",
@@ -1787,10 +2062,11 @@
       ]
     },
     "clwb [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP15 0x0F 0xAE /6",
       "ExpectedArm64ASM": [
-        "dc cvac, x4"
+        "mov x20, x4",
+        "dc cvac, x20"
       ]
     },
     "sfence": {
@@ -1801,54 +2077,60 @@
       ]
     },
     "clflush [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": "GROUP15 0x0F 0xAE /7",
       "ExpectedArm64ASM": [
-        "dc civac, x4",
+        "mov x20, x4",
+        "dc civac, x20",
         "dsb ish"
       ]
     },
     "clflushopt [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "GROUP15 0x0F 0xAE /7",
       "ExpectedArm64ASM": [
-        "dc civac, x4"
+        "mov x20, x4",
+        "dc civac, x20"
       ]
     },
     "prefetchnta [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "GROUP16 0x0F 0x18 /0"
       ],
       "ExpectedArm64ASM": [
-        "prfm pldl1strm, [x4]"
+        "mov x20, x4",
+        "prfm pldl1strm, [x20]"
       ]
     },
     "prefetcht0 [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "GROUP16 0x0F 0x18 /1"
       ],
       "ExpectedArm64ASM": [
-        "prfm pldl1keep, [x4]"
+        "mov x20, x4",
+        "prfm pldl1keep, [x20]"
       ]
     },
     "prefetcht1 [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "GROUP16 0x0F 0x18 /2"
       ],
       "ExpectedArm64ASM": [
-        "prfm pldl2keep, [x4]"
+        "mov x20, x4",
+        "prfm pldl2keep, [x20]"
       ]
     },
     "prefetcht2 [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "GROUP16 0x0F 0x18 /3"
       ],
       "ExpectedArm64ASM": [
-        "prfm pldl3keep, [x4]"
+        "mov x20, x4",
+        "prfm pldl3keep, [x20]"
       ]
     },
     "db 0x0f, 0x18, 0x20;": {
@@ -1861,31 +2143,34 @@
       "ExpectedArm64ASM": []
     },
     "db 0x0f, 0x0d, 0x00": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "GROUPP 0x0F 0x0D /0",
         "prefetch_exclusive [rax]"
       ],
       "ExpectedArm64ASM": [
-        "prfm pldl1keep, [x4]"
+        "mov x20, x4",
+        "prfm pldl1keep, [x20]"
       ]
     },
     "prefetchw [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "GROUPP 0x0F 0x0D /1"
       ],
       "ExpectedArm64ASM": [
-        "prfm pstl1keep, [x4]"
+        "mov x20, x4",
+        "prfm pstl1keep, [x20]"
       ]
     },
     "prefetchwt1 [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "GROUPP 0x0F 0x0D /2"
       ],
       "ExpectedArm64ASM": [
-        "prfm pstl1keep, [x4]"
+        "mov x20, x4",
+        "prfm pstl1keep, [x20]"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/SecondaryModRM.json b/unittests/InstructionCountCI/SecondaryModRM.json
index 8d12aa2d2a..eb0007ef23 100644
--- a/unittests/InstructionCountCI/SecondaryModRM.json
+++ b/unittests/InstructionCountCI/SecondaryModRM.json
@@ -14,9 +14,10 @@
   },
   "Instructions": {
     "xgetbv": {
-      "ExpectedInstructionCount": 54,
+      "ExpectedInstructionCount": 57,
       "Comment": "0xF 0x01 /2 RM-0",
       "ExpectedArm64ASM": [
+        "mov x20, x5",
         "sub sp, sp, #0xf0 (240)",
         "mov x3, sp",
         "st1 {v2.2d, v3.2d}, [x3], #32",
@@ -41,7 +42,7 @@
         "st1 {v20.2d, v21.2d, v22.2d, v23.2d}, [x3], #64",
         "st1 {v24.2d, v25.2d, v26.2d, v27.2d}, [x3], #64",
         "st1 {v28.2d, v29.2d, v30.2d, v31.2d}, [x3], #64",
-        "mov w1, w5",
+        "mov w1, w20",
         "ldr x0, [x28, #1112]",
         "ldr x2, [x28, #1128]",
         "blr x2",
@@ -67,20 +68,22 @@
         "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp], #64",
         "ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov w20, w0",
-        "lsr x21, x0, #32",
-        "mov w4, w20",
-        "mov w6, w21"
+        "mov w22, w0",
+        "lsr x23, x0, #32",
+        "mov w20, w22",
+        "mov w21, w23",
+        "mov x4, x20",
+        "mov x6, x21"
       ]
     },
     "rdtscp": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 24,
       "Comment": "0xF 0x01 /7 RM-1",
       "ExpectedArm64ASM": [
         "dmb ld",
         "mrs x20, S3_3_c14_c0_2",
-        "lsl w4, w20, #7",
-        "lsr x6, x20, #25",
+        "lsl w21, w20, #7",
+        "lsr x22, x20, #25",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "str x8, [x28, #40]",
@@ -97,14 +100,18 @@
         "msr nzcv, x8",
         "ldr x8, [x28, #40]",
         "str xzr, [x28, #1056]",
-        "orr x5, x0, x1, lsl #12"
+        "orr x20, x0, x1, lsl #12",
+        "mov x4, x21",
+        "mov x5, x20",
+        "mov x6, x22"
       ]
     },
     "clzero rax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0xF 0x01 /7 RM-4",
       "ExpectedArm64ASM": [
-        "dc zva, x4"
+        "mov x20, x4",
+        "dc zva, x20"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/Secondary_32Bit.json b/unittests/InstructionCountCI/Secondary_32Bit.json
index 28c6981829..d517028202 100644
--- a/unittests/InstructionCountCI/Secondary_32Bit.json
+++ b/unittests/InstructionCountCI/Secondary_32Bit.json
@@ -9,45 +9,55 @@
   },
   "Instructions": {
     "push fs": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xa0",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x28, #146]",
-        "str w20, [x8, #-4]!"
+        "mov w20, w8",
+        "ldrh w21, [x28, #146]",
+        "mov w22, w20",
+        "str w21, [x22, #-4]!",
+        "mov w8, w22"
       ]
     },
     "pop fs": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xa1",
       "ExpectedArm64ASM": [
-        "ldr w20, [x8]",
-        "add x8, x8, #0x4 (4)",
-        "strh w20, [x28, #146]",
-        "ubfx w20, w20, #3, #13",
+        "mov w20, w8",
+        "ldr w21, [x20]",
+        "add x22, x20, #0x4 (4)",
+        "mov w8, w22",
+        "strh w21, [x28, #146]",
+        "ubfx w20, w21, #3, #13",
         "add x0, x28, x20, lsl #2",
-        "ldr w20, [x0, #896]",
-        "str w20, [x28, #176]"
+        "ldr w21, [x0, #896]",
+        "str w21, [x28, #176]"
       ]
     },
     "push gs": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xa8",
       "ExpectedArm64ASM": [
-        "ldrh w20, [x28, #144]",
-        "str w20, [x8, #-4]!"
+        "mov w20, w8",
+        "ldrh w21, [x28, #144]",
+        "mov w22, w20",
+        "str w21, [x22, #-4]!",
+        "mov w8, w22"
       ]
     },
     "pop gs": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x0f 0xa9",
       "ExpectedArm64ASM": [
-        "ldr w20, [x8]",
-        "add x8, x8, #0x4 (4)",
-        "strh w20, [x28, #144]",
-        "ubfx w20, w20, #3, #13",
+        "mov w20, w8",
+        "ldr w21, [x20]",
+        "add x22, x20, #0x4 (4)",
+        "mov w8, w22",
+        "strh w21, [x28, #144]",
+        "ubfx w20, w21, #3, #13",
         "add x0, x28, x20, lsl #2",
-        "ldr w20, [x0, #896]",
-        "str w20, [x28, #168]"
+        "ldr w21, [x0, #896]",
+        "str w21, [x28, #168]"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/Secondary_OpSize.json b/unittests/InstructionCountCI/Secondary_OpSize.json
index 2d3b36f05d..c8bdd2bab7 100644
--- a/unittests/InstructionCountCI/Secondary_OpSize.json
+++ b/unittests/InstructionCountCI/Secondary_OpSize.json
@@ -17,66 +17,89 @@
       "ExpectedArm64ASM": []
     },
     "movupd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0x66 0x0f 0x10",
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movupd xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x10",
       "ExpectedArm64ASM": [
-        "ldr q16, [x4]"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movupd [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x11",
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "movlpd xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0x12",
       "ExpectedArm64ASM": [
-        "ld1 {v16.d}[0], [x4]"
+        "mov x20, x4",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.d}[0], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "movlpd [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x13",
       "ExpectedArm64ASM": [
-        "str d16, [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "str d2, [x20]"
       ]
     },
     "unpcklpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x14",
       "ExpectedArm64ASM": [
-        "zip1 v16.2d, v16.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "zip1 v4.2d, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "unpckhpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x15",
       "ExpectedArm64ASM": [
-        "zip2 v16.2d, v16.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "zip2 v4.2d, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "movhpd xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0x16",
       "ExpectedArm64ASM": [
-        "ld1 {v16.d}[1], [x4]"
+        "mov x20, x4",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.d}[1], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "movhpd [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x17",
       "ExpectedArm64ASM": [
-        "st1 {v16.d}[1], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.d}[1], [x20]"
       ]
     },
     "movapd xmm0, xmm0": {
@@ -85,373 +108,493 @@
       "ExpectedArm64ASM": []
     },
     "movapd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0x66 0x0f 0x28",
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movapd xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x28",
       "ExpectedArm64ASM": [
-        "ldr q16, [x4]"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movapd [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x29",
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "cvtpi2pd xmm0, mm0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x2a",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
-        "sxtl v2.2d, v2.2s",
-        "scvtf v16.2d, v2.2d"
+        "sxtl v3.2d, v2.2s",
+        "scvtf v2.2d, v3.2d",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movntpd [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x2b",
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "cvttpd2pi mm0, xmm0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x2c",
       "ExpectedArm64ASM": [
-        "fcvtn v2.2s, v16.2d",
-        "fcvtzs v2.2s, v2.2s",
+        "mov v2.16b, v16.16b",
+        "fcvtn v3.2s, v2.2d",
+        "fcvtzs v2.2s, v3.2s",
         "str d2, [x28, #768]"
       ]
     },
     "cvtpd2pi mm0, xmm0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0x2d",
       "ExpectedArm64ASM": [
-        "fcvtn v2.2s, v16.2d",
-        "frinti v2.2s, v2.2s",
+        "mov v2.16b, v16.16b",
+        "fcvtn v3.2s, v2.2d",
+        "frinti v2.2s, v3.2s",
         "fcvtzs v2.2s, v2.2s",
         "str d2, [x28, #768]"
       ]
     },
     "ucomisd xmm0, xmm1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 15,
       "Comment": "0x66 0x0f 0x2e",
       "ExpectedArm64ASM": [
-        "fcmp d16, d17",
-        "mov w27, #0x0",
-        "cset w20, eq",
-        "cset w21, lo",
-        "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
-        "msr nzcv, x20"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fcmp d2, d3",
+        "mov w20, #0x0",
+        "cset w21, eq",
+        "cset w22, lo",
+        "cset w23, vs",
+        "orr w24, w22, w23",
+        "lsl x22, x24, #29",
+        "orr w24, w21, w23",
+        "orr w21, w22, w24, lsl #30",
+        "eor w22, w23, #0x1",
+        "mov x26, x22",
+        "mov x27, x20",
+        "msr nzcv, x21"
       ]
     },
     "comisd xmm0, xmm1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 15,
       "Comment": "0x66 0x0f 0x2f",
       "ExpectedArm64ASM": [
-        "fcmp d16, d17",
-        "mov w27, #0x0",
-        "cset w20, eq",
-        "cset w21, lo",
-        "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
-        "msr nzcv, x20"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fcmp d2, d3",
+        "mov w20, #0x0",
+        "cset w21, eq",
+        "cset w22, lo",
+        "cset w23, vs",
+        "orr w24, w22, w23",
+        "lsl x22, x24, #29",
+        "orr w24, w21, w23",
+        "orr w21, w22, w24, lsl #30",
+        "eor w22, w23, #0x1",
+        "mov x26, x22",
+        "mov x27, x20",
+        "msr nzcv, x21"
       ]
     },
     "movmskpd eax, xmm0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x66 0x0f 0x50",
       "ExpectedArm64ASM": [
-        "uzp2 v2.4s, v16.4s, v16.4s",
-        "mov x20, v2.d[0]",
-        "bfi x20, x20, #31, #32",
-        "lsr x4, x20, #62"
+        "mov v2.16b, v16.16b",
+        "uzp2 v3.4s, v2.4s, v2.4s",
+        "mov x20, v3.d[0]",
+        "mov x21, x20",
+        "bfi x21, x20, #31, #32",
+        "lsr x20, x21, #62",
+        "mov x4, x20"
       ]
     },
     "sqrtpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x51",
       "ExpectedArm64ASM": [
-        "fsqrt v16.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "fsqrt v3.2d, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "addpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x58",
       "ExpectedArm64ASM": [
-        "fadd v16.2d, v16.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fadd v4.2d, v3.2d, v2.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "mulpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x59",
       "ExpectedArm64ASM": [
-        "fmul v16.2d, v16.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fmul v4.2d, v3.2d, v2.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cvtpd2ps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x5a",
       "ExpectedArm64ASM": [
-        "fcvtn v16.2s, v17.2d"
+        "mov v2.16b, v17.16b",
+        "fcvtn v3.2s, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "cvtpd2ps xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xf2 0x0f 0x5a",
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "fcvtn v16.2s, v2.2d"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "fcvtn v3.2s, v2.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "cvtps2dq xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x5b",
       "ExpectedArm64ASM": [
-        "frinti v16.4s, v17.4s",
-        "fcvtzs v16.4s, v16.4s"
+        "mov v2.16b, v17.16b",
+        "frinti v3.4s, v2.4s",
+        "fcvtzs v3.4s, v3.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "cvtps2dq xmm0, [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xf2 0x0f 0x5b",
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "frinti v16.4s, v2.4s",
-        "fcvtzs v16.4s, v16.4s"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "frinti v3.4s, v2.4s",
+        "fcvtzs v3.4s, v3.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "subpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x5c",
       "ExpectedArm64ASM": [
-        "fsub v16.2d, v16.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fsub v4.2d, v3.2d, v2.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "minpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0x5d",
       "ExpectedArm64ASM": [
-        "fcmgt v0.2d, v17.2d, v16.2d",
-        "bif v16.16b, v17.16b, v0.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmgt v0.2d, v2.2d, v3.2d",
+        "mov v4.16b, v3.16b",
+        "bif v4.16b, v2.16b, v0.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "divpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x5e",
       "ExpectedArm64ASM": [
-        "fdiv v16.2d, v16.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fdiv v4.2d, v3.2d, v2.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "maxpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0x5f",
       "ExpectedArm64ASM": [
-        "fcmgt v0.2d, v17.2d, v16.2d",
-        "bit v16.16b, v17.16b, v0.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmgt v0.2d, v2.2d, v3.2d",
+        "mov v4.16b, v3.16b",
+        "bit v4.16b, v2.16b, v0.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "punpcklbw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x60",
       "ExpectedArm64ASM": [
-        "zip1 v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "zip1 v4.16b, v2.16b, v3.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "punpcklbw xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0x60",
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "zip1 v16.16b, v16.16b, v2.16b"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "zip1 v4.16b, v2.16b, v3.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "punpcklwd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x61",
       "ExpectedArm64ASM": [
-        "zip1 v16.8h, v16.8h, v17.8h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "zip1 v4.8h, v2.8h, v3.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "punpcklwd xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0x61",
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "zip1 v16.8h, v16.8h, v2.8h"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "zip1 v4.8h, v2.8h, v3.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "punpckldq xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x62",
       "ExpectedArm64ASM": [
-        "zip1 v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "zip1 v4.4s, v2.4s, v3.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "punpckldq xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0x62",
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "zip1 v16.4s, v16.4s, v2.4s"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "zip1 v4.4s, v2.4s, v3.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "packsswb xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0x63",
       "ExpectedArm64ASM": [
-        "sqxtn v16.8b, v16.8h",
-        "sqxtn2 v16.16b, v17.8h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "sqxtn v4.8b, v2.8h",
+        "sqxtn2 v4.16b, v3.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "packsswb xmm0, [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0x63",
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "sqxtn v16.8b, v16.8h",
-        "sqxtn2 v16.16b, v2.8h"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "sqxtn v4.8b, v2.8h",
+        "sqxtn2 v4.16b, v3.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "packsswb xmm0, xmm0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x63",
       "ExpectedArm64ASM": [
-        "mov v0.16b, v16.16b",
-        "sqxtn v16.8b, v16.8h",
-        "sqxtn2 v16.16b, v0.8h"
+        "mov v2.16b, v16.16b",
+        "sqxtn v3.8b, v2.8h",
+        "sqxtn2 v3.16b, v2.8h",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pcmpgtb xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x64",
       "ExpectedArm64ASM": [
-        "cmgt v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "cmgt v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pcmpgtw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x65",
       "ExpectedArm64ASM": [
-        "cmgt v16.8h, v16.8h, v17.8h"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "cmgt v4.8h, v3.8h, v2.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pcmpgtd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x66",
       "ExpectedArm64ASM": [
-        "cmgt v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "cmgt v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "punpckhbw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x68",
       "ExpectedArm64ASM": [
-        "zip2 v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "zip2 v4.16b, v2.16b, v3.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "punpckhbw xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0x68",
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "zip2 v16.16b, v16.16b, v2.16b"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "zip2 v4.16b, v2.16b, v3.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "punpckhwd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x69",
       "ExpectedArm64ASM": [
-        "zip2 v16.8h, v16.8h, v17.8h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "zip2 v4.8h, v2.8h, v3.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "punpckhwd xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0x69",
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "zip2 v16.8h, v16.8h, v2.8h"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "zip2 v4.8h, v2.8h, v3.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "punpckhdq xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x6a",
       "ExpectedArm64ASM": [
-        "zip2 v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "zip2 v4.4s, v2.4s, v3.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "punpckhdq xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0x6a",
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "zip2 v16.4s, v16.4s, v2.4s"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "zip2 v4.4s, v2.4s, v3.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "packssdw xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0x6b",
       "ExpectedArm64ASM": [
-        "sqxtn v16.4h, v16.4s",
-        "sqxtn2 v16.8h, v17.4s"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "sqxtn v4.4h, v2.4s",
+        "sqxtn2 v4.8h, v3.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "punpcklqdq xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x6c",
       "ExpectedArm64ASM": [
-        "zip1 v16.2d, v16.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "zip1 v4.2d, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "punpckhqdq xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x6d",
       "ExpectedArm64ASM": [
-        "zip2 v16.2d, v16.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "zip2 v4.2d, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "movd xmm0, dword [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x6e",
       "ExpectedArm64ASM": [
-        "ldr s16, [x4]"
+        "mov x20, x4",
+        "ldr s2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movd xmm0, eax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x6e",
       "ExpectedArm64ASM": [
-        "fmov s16, w4"
+        "mov x20, x4",
+        "fmov s2, w20",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, qword [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x6e",
       "ExpectedArm64ASM": [
-        "ldr d16, [x4]"
+        "mov x20, x4",
+        "ldr d2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movq xmm0, rax": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x6e",
       "ExpectedArm64ASM": [
-        "fmov d16, x4"
+        "mov x20, x4",
+        "fmov d2, x20",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movdqa xmm0, xmm0": {
@@ -460,127 +603,154 @@
       "ExpectedArm64ASM": []
     },
     "movdqa xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0x66 0x0f 0x6f",
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movdqa xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x6f",
       "ExpectedArm64ASM": [
-        "ldr q16, [x4]"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "pshufd xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Broadcast element 0",
         "0x66 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
-        "dup v16.4s, v17.s[0]"
+        "mov v2.16b, v17.16b",
+        "dup v3.4s, v2.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pshufd xmm0, xmm1, 11100100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Identity copy",
         "0x66 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v16.16b, v2.16b"
       ]
     },
     "pshufd xmm0, xmm1, 01010000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Zip with self",
         "0x66 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
-        "zip1 v16.4s, v17.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "zip1 v3.4s, v2.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pshufd xmm0, [rax], 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Broadcast element 0 from memory",
         "0x66 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "dup v16.4s, v2.s[0]"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "dup v3.4s, v2.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pshufd xmm0, xmm1, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Broadcast element 0",
         "Element 0 becomes element 1",
         "0x66 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
+        "mov v2.16b, v17.16b",
         "ldr x0, [x28, #1760]",
-        "ldr q2, [x0, #16]",
-        "tbl v16.16b, {v17.16b}, v2.16b"
+        "ldr q3, [x0, #16]",
+        "tbl v4.16b, {v2.16b}, v3.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pshufd xmm0, [rax], 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Broadcast element 0 from Memory",
         "Element 0 becomes element 1",
         "0x66 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
+        "mov x20, x4",
+        "ldr q2, [x20]",
         "ldr x0, [x28, #1760]",
         "ldr q3, [x0, #16]",
-        "tbl v16.16b, {v2.16b}, v3.16b"
+        "tbl v4.16b, {v2.16b}, v3.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pshufd xmm0, xmm1, 0xff": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Broadcast element 3",
         "0x66 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
-        "dup v16.4s, v17.s[3]"
+        "mov v2.16b, v17.16b",
+        "dup v3.4s, v2.s[3]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pshufd xmm0, [rax], 0xff": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Broadcast element 3 from memory",
         "0x66 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "dup v16.4s, v2.s[3]"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "dup v3.4s, v2.s[3]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pcmpeqb xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x74",
       "ExpectedArm64ASM": [
-        "cmeq v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "cmeq v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pcmpeqw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x75",
       "ExpectedArm64ASM": [
-        "cmeq v16.8h, v16.8h, v17.8h"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "cmeq v4.8h, v3.8h, v2.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pcmpeqd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x76",
       "ExpectedArm64ASM": [
-        "cmeq v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "cmeq v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "extrq xmm0, 64, 0": {
@@ -616,766 +786,1061 @@
       ]
     },
     "haddpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0x7c",
       "ExpectedArm64ASM": [
-        "faddp v16.2d, v16.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "faddp v4.2d, v3.2d, v2.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "hsubpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0x7c",
       "ExpectedArm64ASM": [
-        "uzp1 v2.2d, v16.2d, v17.2d",
-        "uzp2 v3.2d, v16.2d, v17.2d",
-        "fsub v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "uzp1 v4.2d, v2.2d, v3.2d",
+        "uzp2 v5.2d, v2.2d, v3.2d",
+        "fsub v2.2d, v4.2d, v5.2d",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movd eax, xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x7e",
       "ExpectedArm64ASM": [
-        "mov w4, v16.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov w20, v2.s[0]",
+        "mov x4, x20"
       ]
     },
     "movq rax, xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x7e",
       "ExpectedArm64ASM": [
-        "mov x4, v16.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov x20, v2.d[0]",
+        "mov x4, x20"
       ]
     },
     "movd dword [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x7e",
       "ExpectedArm64ASM": [
-        "str s16, [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "str s2, [x20]"
       ]
     },
     "movq qword [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x7e",
       "ExpectedArm64ASM": [
-        "str d16, [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "str d2, [x20]"
       ]
     },
     "movdqa [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0x7f",
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "cmppd xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xc2",
       "ExpectedArm64ASM": [
-        "fcmeq v16.2d, v16.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmeq v4.2d, v3.2d, v2.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmppd xmm0, xmm1, 1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xc2",
       "ExpectedArm64ASM": [
-        "fcmgt v16.2d, v17.2d, v16.2d"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmgt v4.2d, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmppd xmm0, xmm1, 2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xc2",
       "ExpectedArm64ASM": [
-        "fcmge v16.2d, v17.2d, v16.2d"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmge v4.2d, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmppd xmm0, xmm1, 3": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x66 0x0f 0xc2",
       "ExpectedArm64ASM": [
-        "fcmge v0.2d, v16.2d, v17.2d",
-        "fcmgt v1.2d, v17.2d, v16.2d",
-        "orr v16.16b, v0.16b, v1.16b",
-        "mvn v16.16b, v16.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmge v0.2d, v3.2d, v2.2d",
+        "fcmgt v1.2d, v2.2d, v3.2d",
+        "orr v4.16b, v0.16b, v1.16b",
+        "mvn v4.16b, v4.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmppd xmm0, xmm1, 4": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc2",
       "ExpectedArm64ASM": [
-        "fcmeq v16.2d, v16.2d, v17.2d",
-        "mvn v16.16b, v16.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmeq v4.2d, v3.2d, v2.2d",
+        "mvn v4.16b, v4.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmppd xmm0, xmm1, 5": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc2",
       "ExpectedArm64ASM": [
-        "fcmgt v2.2d, v17.2d, v16.2d",
-        "mvn v16.16b, v2.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmgt v4.2d, v2.2d, v3.2d",
+        "mvn v2.16b, v4.16b",
+        "mov v16.16b, v2.16b"
       ]
     },
     "cmppd xmm0, xmm1, 6": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc2",
       "ExpectedArm64ASM": [
-        "fcmge v2.2d, v17.2d, v16.2d",
-        "mvn v16.16b, v2.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmge v4.2d, v2.2d, v3.2d",
+        "mvn v2.16b, v4.16b",
+        "mov v16.16b, v2.16b"
       ]
     },
     "cmppd xmm0, xmm1, 7": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0xc2",
       "ExpectedArm64ASM": [
-        "fcmge v0.2d, v16.2d, v17.2d",
-        "fcmgt v1.2d, v17.2d, v16.2d",
-        "orr v16.16b, v0.16b, v1.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "fcmge v0.2d, v3.2d, v2.2d",
+        "fcmgt v1.2d, v2.2d, v3.2d",
+        "orr v4.16b, v0.16b, v1.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pinsrw xmm0, eax, 000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc4",
       "ExpectedArm64ASM": [
-        "mov v16.h[0], w4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[0], w20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrw xmm0, eax, 001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc4",
       "ExpectedArm64ASM": [
-        "mov v16.h[1], w4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[1], w20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrw xmm0, eax, 010b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc4",
       "ExpectedArm64ASM": [
-        "mov v16.h[2], w4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[2], w20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrw xmm0, eax, 011b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc4",
       "ExpectedArm64ASM": [
-        "mov v16.h[3], w4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[3], w20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrw xmm0, eax, 100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc4",
       "ExpectedArm64ASM": [
-        "mov v16.h[4], w4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[4], w20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrw xmm0, eax, 101b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc4",
       "ExpectedArm64ASM": [
-        "mov v16.h[5], w4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[5], w20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrw xmm0, eax, 110b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc4",
       "ExpectedArm64ASM": [
-        "mov v16.h[6], w4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[6], w20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrw xmm0, eax, 111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc4",
       "ExpectedArm64ASM": [
-        "mov v16.h[7], w4"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[7], w20",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrw xmm0, [rax], 000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc4",
       "ExpectedArm64ASM": [
-        "ld1 {v16.h}[0], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.h}[0], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrw xmm0, [rax], 001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc4",
       "ExpectedArm64ASM": [
-        "ld1 {v16.h}[1], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.h}[1], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrw xmm0, [rax], 010b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc4",
       "ExpectedArm64ASM": [
-        "ld1 {v16.h}[2], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.h}[2], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrw xmm0, [rax], 011b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc4",
       "ExpectedArm64ASM": [
-        "ld1 {v16.h}[3], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.h}[3], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrw xmm0, [rax], 100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc4",
       "ExpectedArm64ASM": [
-        "ld1 {v16.h}[4], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.h}[4], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrw xmm0, [rax], 101b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc4",
       "ExpectedArm64ASM": [
-        "ld1 {v16.h}[5], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.h}[5], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrw xmm0, [rax], 110b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc4",
       "ExpectedArm64ASM": [
-        "ld1 {v16.h}[6], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.h}[6], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pinsrw xmm0, [rax], 111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc4",
       "ExpectedArm64ASM": [
-        "ld1 {v16.h}[7], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "ld1 {v3.h}[7], [x20]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pextrw eax, xmm0, 000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0xc5",
       "ExpectedArm64ASM": [
-        "umov w4, v16.h[0]"
+        "mov v2.16b, v16.16b",
+        "umov w20, v2.h[0]",
+        "mov x4, x20"
       ]
     },
     "pextrw eax, xmm0, 001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0xc5",
       "ExpectedArm64ASM": [
-        "umov w4, v16.h[1]"
+        "mov v2.16b, v16.16b",
+        "umov w20, v2.h[1]",
+        "mov x4, x20"
       ]
     },
     "pextrw eax, xmm0, 010b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0xc5",
       "ExpectedArm64ASM": [
-        "umov w4, v16.h[2]"
+        "mov v2.16b, v16.16b",
+        "umov w20, v2.h[2]",
+        "mov x4, x20"
       ]
     },
     "pextrw eax, xmm0, 011b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0xc5",
       "ExpectedArm64ASM": [
-        "umov w4, v16.h[3]"
+        "mov v2.16b, v16.16b",
+        "umov w20, v2.h[3]",
+        "mov x4, x20"
       ]
     },
     "pextrw eax, xmm0, 100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0xc5",
       "ExpectedArm64ASM": [
-        "umov w4, v16.h[4]"
+        "mov v2.16b, v16.16b",
+        "umov w20, v2.h[4]",
+        "mov x4, x20"
       ]
     },
     "pextrw eax, xmm0, 101b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0xc5",
       "ExpectedArm64ASM": [
-        "umov w4, v16.h[5]"
+        "mov v2.16b, v16.16b",
+        "umov w20, v2.h[5]",
+        "mov x4, x20"
       ]
     },
     "pextrw eax, xmm0, 110b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0xc5",
       "ExpectedArm64ASM": [
-        "umov w4, v16.h[6]"
+        "mov v2.16b, v16.16b",
+        "umov w20, v2.h[6]",
+        "mov x4, x20"
       ]
     },
     "pextrw eax, xmm0, 111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0xc5",
       "ExpectedArm64ASM": [
-        "umov w4, v16.h[7]"
+        "mov v2.16b, v16.16b",
+        "umov w20, v2.h[7]",
+        "mov x4, x20"
       ]
     },
     "pextrw [rax], xmm0, 000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0xc5",
       "ExpectedArm64ASM": [
-        "st1 {v16.h}[0], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.h}[0], [x20]"
       ]
     },
     "pextrw [rax], xmm0, 001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0xc5",
       "ExpectedArm64ASM": [
-        "st1 {v16.h}[1], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.h}[1], [x20]"
       ]
     },
     "pextrw [rax], xmm0, 010b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0xc5",
       "ExpectedArm64ASM": [
-        "st1 {v16.h}[2], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.h}[2], [x20]"
       ]
     },
     "pextrw [rax], xmm0, 011b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0xc5",
       "ExpectedArm64ASM": [
-        "st1 {v16.h}[3], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.h}[3], [x20]"
       ]
     },
     "pextrw [rax], xmm0, 100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0xc5",
       "ExpectedArm64ASM": [
-        "st1 {v16.h}[4], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.h}[4], [x20]"
       ]
     },
     "pextrw [rax], xmm0, 101b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0xc5",
       "ExpectedArm64ASM": [
-        "st1 {v16.h}[5], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.h}[5], [x20]"
       ]
     },
     "pextrw [rax], xmm0, 110b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0xc5",
       "ExpectedArm64ASM": [
-        "st1 {v16.h}[6], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.h}[6], [x20]"
       ]
     },
     "pextrw [rax], xmm0, 111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0xc5",
       "ExpectedArm64ASM": [
-        "st1 {v16.h}[7], [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "st1 {v2.h}[7], [x20]"
       ]
     },
     "shufpd xmm0, xmm1, 00b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xc6",
       "ExpectedArm64ASM": [
-        "zip1 v16.2d, v16.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "zip1 v4.2d, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "shufpd xmm0, xmm1, 01b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xc6",
       "ExpectedArm64ASM": [
-        "ext v16.16b, v16.16b, v17.16b, #8"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "ext v4.16b, v2.16b, v3.16b, #8",
+        "mov v16.16b, v4.16b"
       ]
     },
     "shufpd xmm0, xmm1, 10b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc6",
       "ExpectedArm64ASM": [
-        "mov v16.d[1], v17.d[1]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[1], v3.d[1]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "shufpd xmm0, xmm1, 11b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xc6",
       "ExpectedArm64ASM": [
-        "zip2 v16.2d, v16.2d, v17.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "zip2 v4.2d, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "shufpd xmm1, xmm0, 00b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xc6",
       "ExpectedArm64ASM": [
-        "zip1 v17.2d, v17.2d, v16.2d"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "zip1 v4.2d, v2.2d, v3.2d",
+        "mov v17.16b, v4.16b"
       ]
     },
     "shufpd xmm1, xmm0, 01b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xc6",
       "ExpectedArm64ASM": [
-        "ext v17.16b, v17.16b, v16.16b, #8"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "ext v4.16b, v2.16b, v3.16b, #8",
+        "mov v17.16b, v4.16b"
       ]
     },
     "shufpd xmm1, xmm0, 10b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xc6",
       "ExpectedArm64ASM": [
-        "mov v17.d[1], v16.d[1]"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[1], v3.d[1]",
+        "mov v17.16b, v4.16b"
       ]
     },
     "shufpd xmm1, xmm0, 11b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xc6",
       "ExpectedArm64ASM": [
-        "zip2 v17.2d, v17.2d, v16.2d"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "zip2 v4.2d, v2.2d, v3.2d",
+        "mov v17.16b, v4.16b"
       ]
     },
     "addsubpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0xd0",
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2112]",
-        "eor v2.16b, v17.16b, v2.16b",
-        "fadd v16.2d, v16.2d, v2.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "ldr q4, [x28, #2112]",
+        "eor v5.16b, v3.16b, v4.16b",
+        "fadd v3.2d, v2.2d, v5.2d",
+        "mov v16.16b, v3.16b"
       ]
     },
     "psrlw xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x66 0x0f 0xd1",
       "ExpectedArm64ASM": [
-        "uqshl d0, d17, #57",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "uqshl d0, d3, #57",
         "ushr d0, d0, #57",
         "dup v0.8h, v0.h[0]",
         "neg v0.8h, v0.8h",
-        "ushl v16.8h, v16.8h, v0.8h"
+        "ushl v4.8h, v2.8h, v0.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psrld xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x66 0x0f 0xd2",
       "ExpectedArm64ASM": [
-        "uqshl d0, d17, #57",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "uqshl d0, d3, #57",
         "ushr d0, d0, #57",
         "dup v0.4s, v0.s[0]",
         "neg v0.4s, v0.4s",
-        "ushl v16.4s, v16.4s, v0.4s"
+        "ushl v4.4s, v2.4s, v0.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psrlq xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x66 0x0f 0xd3",
       "ExpectedArm64ASM": [
-        "uqshl d0, d17, #57",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "uqshl d0, d3, #57",
         "ushr d0, d0, #57",
         "dup v0.2d, v0.d[0]",
         "neg v0.2d, v0.2d",
-        "ushl v16.2d, v16.2d, v0.2d"
+        "ushl v4.2d, v2.2d, v0.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "paddq xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xd4",
       "ExpectedArm64ASM": [
-        "add v16.2d, v16.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "add v4.2d, v3.2d, v2.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pmullw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xd3",
       "ExpectedArm64ASM": [
-        "mul v16.8h, v16.8h, v17.8h"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "mul v4.8h, v3.8h, v2.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pmovmskb eax, xmm0": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0x66 0x0f 0xd7",
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2272]",
-        "cmlt v3.16b, v16.16b, #0",
-        "and v2.16b, v3.16b, v2.16b",
-        "addp v2.16b, v2.16b, v2.16b",
-        "addp v2.8b, v2.8b, v2.8b",
-        "addp v2.8b, v2.8b, v2.8b",
-        "umov w4, v2.h[0]"
+        "mov v2.16b, v16.16b",
+        "ldr q3, [x28, #2272]",
+        "cmlt v4.16b, v2.16b, #0",
+        "and v2.16b, v4.16b, v3.16b",
+        "addp v3.16b, v2.16b, v2.16b",
+        "addp v2.8b, v3.8b, v3.8b",
+        "addp v3.8b, v2.8b, v2.8b",
+        "umov w20, v3.h[0]",
+        "mov x4, x20"
       ]
     },
     "psubusb xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xd8",
       "ExpectedArm64ASM": [
-        "uqsub v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "uqsub v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psubusw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xd9",
       "ExpectedArm64ASM": [
-        "uqsub v16.8h, v16.8h, v17.8h"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "uqsub v4.8h, v3.8h, v2.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pminub xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xda",
       "ExpectedArm64ASM": [
-        "umin v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "umin v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pand xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xdb",
       "ExpectedArm64ASM": [
-        "and v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "and v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "paddusb xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xdc",
       "ExpectedArm64ASM": [
-        "uqadd v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "uqadd v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "paddusw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xdd",
       "ExpectedArm64ASM": [
-        "uqadd v16.8h, v16.8h, v17.8h"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "uqadd v4.8h, v3.8h, v2.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pmaxub xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xde",
       "ExpectedArm64ASM": [
-        "umax v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "umax v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pandn xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xdf",
       "ExpectedArm64ASM": [
-        "bic v16.16b, v17.16b, v16.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "bic v4.16b, v2.16b, v3.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pavgb xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xe0",
       "ExpectedArm64ASM": [
-        "urhadd v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "urhadd v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psraw xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x66 0x0f 0xe1",
       "ExpectedArm64ASM": [
-        "uqshl d0, d17, #57",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "uqshl d0, d3, #57",
         "ushr d0, d0, #57",
         "dup v0.8h, v0.h[0]",
         "neg v0.8h, v0.8h",
-        "sshl v16.8h, v16.8h, v0.8h"
+        "sshl v4.8h, v2.8h, v0.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psrad xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x66 0x0f 0xe2",
       "ExpectedArm64ASM": [
-        "uqshl d0, d17, #57",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "uqshl d0, d3, #57",
         "ushr d0, d0, #57",
         "dup v0.4s, v0.s[0]",
         "neg v0.4s, v0.4s",
-        "sshl v16.4s, v16.4s, v0.4s"
+        "sshl v4.4s, v2.4s, v0.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pavgw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xe3",
       "ExpectedArm64ASM": [
-        "urhadd v16.8h, v16.8h, v17.8h"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "urhadd v4.8h, v3.8h, v2.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pmulhuw xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0xe4",
       "ExpectedArm64ASM": [
-        "umull2 v0.4s, v16.8h, v17.8h",
-        "umull v16.4s, v16.4h, v17.4h",
-        "uzp2 v16.8h, v16.8h, v0.8h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "umull2 v0.4s, v2.8h, v3.8h",
+        "umull v4.4s, v2.4h, v3.4h",
+        "uzp2 v4.8h, v4.8h, v0.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pmulhw xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0xe5",
       "ExpectedArm64ASM": [
-        "smull2 v0.4s, v16.8h, v17.8h",
-        "smull v16.4s, v16.4h, v17.4h",
-        "uzp2 v16.8h, v16.8h, v0.8h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "smull2 v0.4s, v2.8h, v3.8h",
+        "smull v4.4s, v2.4h, v3.4h",
+        "uzp2 v4.8h, v4.8h, v0.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cvttpd2dq xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xe6",
       "ExpectedArm64ASM": [
-        "fcvtn v2.2s, v17.2d",
-        "fcvtzs v16.4s, v2.4s"
+        "mov v2.16b, v17.16b",
+        "fcvtn v3.2s, v2.2d",
+        "fcvtzs v2.4s, v3.4s",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movntdq [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0x66 0x0f 0xe7",
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "psubsb xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xe8",
       "ExpectedArm64ASM": [
-        "sqsub v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "sqsub v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psubsw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xe9",
       "ExpectedArm64ASM": [
-        "sqsub v16.8h, v16.8h, v17.8h"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "sqsub v4.8h, v3.8h, v2.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pminsw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xea",
       "ExpectedArm64ASM": [
-        "smin v16.8h, v16.8h, v17.8h"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "smin v4.8h, v3.8h, v2.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "por xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xeb",
       "ExpectedArm64ASM": [
-        "orr v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "orr v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "paddsb xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xec",
       "ExpectedArm64ASM": [
-        "sqadd v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "sqadd v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "paddsw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xed",
       "ExpectedArm64ASM": [
-        "sqadd v16.8h, v16.8h, v17.8h"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "sqadd v4.8h, v3.8h, v2.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pmaxsw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xee",
       "ExpectedArm64ASM": [
-        "smax v16.8h, v16.8h, v17.8h"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "smax v4.8h, v3.8h, v2.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pxor xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xef",
       "ExpectedArm64ASM": [
-        "eor v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "eor v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psllw xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x66 0x0f 0xf1",
       "ExpectedArm64ASM": [
-        "uqshl d0, d17, #57",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "uqshl d0, d3, #57",
         "ushr d0, d0, #57",
         "dup v0.8h, v0.h[0]",
-        "ushl v16.8h, v16.8h, v0.8h"
+        "ushl v4.8h, v2.8h, v0.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pslld xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x66 0x0f 0xf2",
       "ExpectedArm64ASM": [
-        "uqshl d0, d17, #57",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "uqshl d0, d3, #57",
         "ushr d0, d0, #57",
         "dup v0.4s, v0.s[0]",
-        "ushl v16.4s, v16.4s, v0.4s"
+        "ushl v4.4s, v2.4s, v0.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psllq xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x66 0x0f 0xf3",
       "ExpectedArm64ASM": [
-        "uqshl d0, d17, #57",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "uqshl d0, d3, #57",
         "ushr d0, d0, #57",
         "dup v0.2d, v0.d[0]",
-        "ushl v16.2d, v16.2d, v0.2d"
+        "ushl v4.2d, v2.2d, v0.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pmuludq xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0xf4",
       "ExpectedArm64ASM": [
-        "uzp1 v2.4s, v16.4s, v16.4s",
-        "uzp1 v3.4s, v17.4s, v17.4s",
-        "umull v16.2d, v2.2s, v3.2s"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "uzp1 v4.4s, v2.4s, v2.4s",
+        "uzp1 v2.4s, v3.4s, v3.4s",
+        "umull v3.2d, v4.2s, v2.2s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "pmaddwd xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0xf5",
       "ExpectedArm64ASM": [
-        "smull v2.4s, v16.4h, v17.4h",
-        "smull2 v3.4s, v16.8h, v17.8h",
-        "addp v16.4s, v2.4s, v3.4s"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "smull v4.4s, v2.4h, v3.4h",
+        "smull2 v5.4s, v2.8h, v3.8h",
+        "addp v2.4s, v4.4s, v5.4s",
+        "mov v16.16b, v2.16b"
       ]
     },
     "psadbw xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x66 0x0f 0xf6",
       "ExpectedArm64ASM": [
-        "uabdl v2.8h, v16.8b, v17.8b",
-        "uabdl2 v3.8h, v16.16b, v17.16b",
-        "addv h2, v2.8h",
-        "addv h3, v3.8h",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "uabdl v4.8h, v2.8b, v3.8b",
+        "uabdl2 v5.8h, v2.16b, v3.16b",
+        "addv h2, v4.8h",
+        "addv h3, v5.8h",
+        "zip1 v4.2d, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "maskmovdqu xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": "0x66 0x0f 0xf7",
       "ExpectedArm64ASM": [
-        "cmlt v2.16b, v17.16b, #0",
-        "ldr q3, [x11]",
-        "bsl v2.16b, v16.16b, v3.16b",
-        "str q2, [x11]"
+        "mov v2.16b, v17.16b",
+        "cmlt v3.16b, v2.16b, #0",
+        "mov v2.16b, v16.16b",
+        "mov x20, x11",
+        "ldr q4, [x20]",
+        "mov v5.16b, v3.16b",
+        "bsl v5.16b, v2.16b, v4.16b",
+        "str q5, [x20]"
       ]
     },
     "psubb xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xf8",
       "ExpectedArm64ASM": [
-        "sub v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "sub v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psubw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xf9",
       "ExpectedArm64ASM": [
-        "sub v16.8h, v16.8h, v17.8h"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "sub v4.8h, v3.8h, v2.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psubd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xfa",
       "ExpectedArm64ASM": [
-        "sub v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "sub v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psubq xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xfb",
       "ExpectedArm64ASM": [
-        "sub v16.2d, v16.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "sub v4.2d, v3.2d, v2.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "paddb xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xfc",
       "ExpectedArm64ASM": [
-        "add v16.16b, v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "add v4.16b, v3.16b, v2.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "paddw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xfd",
       "ExpectedArm64ASM": [
-        "add v16.8h, v16.8h, v17.8h"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "add v4.8h, v3.8h, v2.8h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "paddd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xfe",
       "ExpectedArm64ASM": [
-        "add v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "add v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/Secondary_OpSize_FCMA.json b/unittests/InstructionCountCI/Secondary_OpSize_FCMA.json
index e4353bb3cf..8bb63b678d 100644
--- a/unittests/InstructionCountCI/Secondary_OpSize_FCMA.json
+++ b/unittests/InstructionCountCI/Secondary_OpSize_FCMA.json
@@ -11,11 +11,14 @@
   },
   "Instructions": {
     "addsubpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x66 0x0f 0xd0",
       "ExpectedArm64ASM": [
-        "ext v2.16b, v17.16b, v17.16b, #8",
-        "fcadd v16.2d, v16.2d, v2.2d, #90"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "ext v4.16b, v3.16b, v3.16b, #8",
+        "fcadd v3.2d, v2.2d, v4.2d, #90",
+        "mov v16.16b, v3.16b"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/Secondary_OpSize_SVE128.json b/unittests/InstructionCountCI/Secondary_OpSize_SVE128.json
index 2ab3ee916b..9ab45ed750 100644
--- a/unittests/InstructionCountCI/Secondary_OpSize_SVE128.json
+++ b/unittests/InstructionCountCI/Secondary_OpSize_SVE128.json
@@ -10,81 +10,119 @@
   },
   "Instructions": {
     "psrlw xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0xd1",
       "ExpectedArm64ASM": [
-        "mov z0.d, d17",
-        "lsr z16.h, p6/m, z16.h, z0.d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsr z4.h, p6/m, z4.h, z0.d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psrld xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0xd2",
       "ExpectedArm64ASM": [
-        "mov z0.d, d17",
-        "lsr z16.s, p6/m, z16.s, z0.d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsr z4.s, p6/m, z4.s, z0.d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psrlq xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0xd3",
       "ExpectedArm64ASM": [
-        "mov z0.d, d17",
-        "lsr z16.d, p6/m, z16.d, z0.d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsr z4.d, p6/m, z4.d, z0.d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psraw xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0xe1",
       "ExpectedArm64ASM": [
-        "mov z0.d, d17",
-        "asr z16.h, p6/m, z16.h, z0.d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "asr z4.h, p6/m, z4.h, z0.d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psrad xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0xe2",
       "ExpectedArm64ASM": [
-        "mov z0.d, d17",
-        "asr z16.s, p6/m, z16.s, z0.d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "asr z4.s, p6/m, z4.s, z0.d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pmulhuw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xe4",
       "ExpectedArm64ASM": [
-        "umulh z16.h, z16.h, z17.h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "umulh z4.h, z2.h, z3.h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pmulhw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0x66 0x0f 0xe5",
       "ExpectedArm64ASM": [
-        "smulh z16.h, z16.h, z17.h"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "smulh z4.h, z2.h, z3.h",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psllw xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0xf1",
       "ExpectedArm64ASM": [
-        "mov z0.d, d17",
-        "lsl z16.h, p6/m, z16.h, z0.d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsl z4.h, p6/m, z4.h, z0.d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pslld xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0xf2",
       "ExpectedArm64ASM": [
-        "mov z0.d, d17",
-        "lsl z16.s, p6/m, z16.s, z0.d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsl z4.s, p6/m, z4.s, z0.d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "psllq xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0x66 0x0f 0xf3",
       "ExpectedArm64ASM": [
-        "mov z0.d, d17",
-        "lsl z16.d, p6/m, z16.d, z0.d"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsl z4.d, p6/m, z4.d, z0.d",
+        "mov v16.16b, v4.16b"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/Secondary_OpSize_SVE256.json b/unittests/InstructionCountCI/Secondary_OpSize_SVE256.json
index 768bcf23c5..722f1e186e 100644
--- a/unittests/InstructionCountCI/Secondary_OpSize_SVE256.json
+++ b/unittests/InstructionCountCI/Secondary_OpSize_SVE256.json
@@ -9,23 +9,31 @@
   },
   "Instructions": {
     "pmulhuw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "SVE-256bit changes behaviour slightly",
         "0x66 0x0f 0xe4"
       ],
       "ExpectedArm64ASM": [
-        "umulh z16.h, p6/m, z16.h, z17.h"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movprfx z4, z2",
+        "umulh z4.h, p6/m, z4.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "pmulhw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "SVE-256bit changes behaviour slightly",
         "0x66 0x0f 0xe5"
       ],
       "ExpectedArm64ASM": [
-        "smulh z16.h, p6/m, z16.h, z17.h"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movprfx z4, z2",
+        "smulh z4.h, p6/m, z4.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/Secondary_REP.json b/unittests/InstructionCountCI/Secondary_REP.json
index 3f38dc715e..6908053173 100644
--- a/unittests/InstructionCountCI/Secondary_REP.json
+++ b/unittests/InstructionCountCI/Secondary_REP.json
@@ -13,285 +13,381 @@
   },
   "Instructions": {
     "movss xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xf3 0x0f 0x10",
       "ExpectedArm64ASM": [
-        "mov v16.s[0], v17.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "movss xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf3 0x0f 0x10",
       "ExpectedArm64ASM": [
-        "ldr s16, [x4]"
+        "mov x20, x4",
+        "ldr s2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movss [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf3 0x0f 0x11",
       "ExpectedArm64ASM": [
-        "str s16, [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "str s2, [x20]"
       ]
     },
     "movsldup xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf3 0x0f 0x12",
       "ExpectedArm64ASM": [
-        "trn1 v16.4s, v17.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "trn1 v3.4s, v2.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "movsldup xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xf3 0x0f 0x12",
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "trn1 v16.4s, v2.4s, v2.4s"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "trn1 v3.4s, v2.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "movshdup xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf3 0x0f 0x16",
       "ExpectedArm64ASM": [
-        "trn2 v16.4s, v17.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "trn2 v3.4s, v2.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "movshdup xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xf3 0x0f 0x16",
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "trn2 v16.4s, v2.4s, v2.4s"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "trn2 v3.4s, v2.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "cvtsi2ss xmm0, eax": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf3 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "scvtf s0, w4",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "scvtf s0, w20",
+        "mov v3.s[0], v0.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "cvtsi2ss xmm0, dword [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf3 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr s2, [x4]",
-        "scvtf s0, s2",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr s3, [x20]",
+        "mov v4.16b, v2.16b",
+        "scvtf s0, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cvtsi2ss xmm0, rax": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0xf3 0x0f 0x2a",
       "ExpectedArm64ASM": [
-        "scvtf s0, x4",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "scvtf s0, x20",
+        "mov v3.s[0], v0.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "cvtsi2ss xmm0, qword [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf3 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr x20, [x4]",
-        "scvtf s0, x20",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr x21, [x20]",
+        "mov v3.16b, v2.16b",
+        "scvtf s0, x21",
+        "mov v3.s[0], v0.s[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "movntss [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf3 0x0f 0x2b",
       "ExpectedArm64ASM": [
-        "str s16, [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "str s2, [x20]"
       ]
     },
     "cvttss2si eax, xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf3 0x0f 0x2c",
       "ExpectedArm64ASM": [
-        "fcvtzs w4, s16"
+        "mov v2.16b, v16.16b",
+        "fcvtzs w20, s2",
+        "mov x4, x20"
       ]
     },
     "cvttss2si eax, dword [rbx]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xf3 0x0f 0x2c",
       "ExpectedArm64ASM": [
-        "ldr s2, [x7]",
-        "fcvtzs w4, s2"
+        "mov x20, x7",
+        "ldr s2, [x20]",
+        "fcvtzs w20, s2",
+        "mov x4, x20"
       ]
     },
     "cvttss2si rax, xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf3 0x0f 0x2c",
       "ExpectedArm64ASM": [
-        "fcvtzs x4, s16"
+        "mov v2.16b, v16.16b",
+        "fcvtzs x20, s2",
+        "mov x4, x20"
       ]
     },
     "cvttss2si rax, dword [rbx]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xf3 0x0f 0x2c",
       "ExpectedArm64ASM": [
-        "ldr d2, [x7]",
-        "fcvtzs x4, s2"
+        "mov x20, x7",
+        "ldr d2, [x20]",
+        "fcvtzs x20, s2",
+        "mov x4, x20"
       ]
     },
     "cvtss2si eax, xmm0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xf3 0x0f 0x2d",
       "ExpectedArm64ASM": [
-        "frinti s0, s16",
-        "fcvtzs w4, s0"
+        "mov v2.16b, v16.16b",
+        "frinti s0, s2",
+        "fcvtzs w20, s0",
+        "mov x4, x20"
       ]
     },
     "cvtss2si eax, dword [rbx]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xf3 0x0f 0x2d",
       "ExpectedArm64ASM": [
-        "ldr s2, [x7]",
+        "mov x20, x7",
+        "ldr s2, [x20]",
         "frinti s0, s2",
-        "fcvtzs w4, s0"
+        "fcvtzs w20, s0",
+        "mov x4, x20"
       ]
     },
     "cvtss2si rax, xmm0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xf3 0x0f 0x2d",
       "ExpectedArm64ASM": [
-        "frinti s0, s16",
-        "fcvtzs x4, s0"
+        "mov v2.16b, v16.16b",
+        "frinti s0, s2",
+        "fcvtzs x20, s0",
+        "mov x4, x20"
       ]
     },
     "cvtss2si rax, dword [rbx]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xf3 0x0f 0x2d",
       "ExpectedArm64ASM": [
-        "ldr d2, [x7]",
+        "mov x20, x7",
+        "ldr d2, [x20]",
         "frinti s0, s2",
-        "fcvtzs x4, s0"
+        "fcvtzs x20, s0",
+        "mov x4, x20"
       ]
     },
     "sqrtss xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0xf3 0x0f 0x51",
       "ExpectedArm64ASM": [
-        "fsqrt s0, s17",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fsqrt s0, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "rsqrtss xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xf3 0x0f 0x52"
       ],
       "ExpectedArm64ASM": [
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
         "fmov s0, #0x70 (1.0000)",
-        "fsqrt s1, s17",
+        "fsqrt s1, s3",
         "fdiv s0, s0, s1",
-        "mov v16.s[0], v0.s[0]"
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "rcpss xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf3 0x0f 0x53"
       ],
       "ExpectedArm64ASM": [
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
         "fmov s0, #0x70 (1.0000)",
-        "fdiv s0, s0, s17",
-        "mov v16.s[0], v0.s[0]"
+        "fdiv s0, s0, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "addss xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf3 0x0f 0x58"
       ],
       "ExpectedArm64ASM": [
-        "fadd s0, s16, s17",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fadd s0, s2, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "mulss xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf3 0x0f 0x59"
       ],
       "ExpectedArm64ASM": [
-        "fmul s0, s16, s17",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fmul s0, s2, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cvtss2sd xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": "0xf3 0x0f 0x5a",
       "ExpectedArm64ASM": [
-        "fcvt d0, s17",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcvt d0, s3",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cvtss2sd xmm0, [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": "0xf3 0x0f 0x5a",
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
-        "fcvt d0, s2",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr d3, [x20]",
+        "mov v4.16b, v2.16b",
+        "fcvt d0, s3",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cvttps2dq xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf3 0x0f 0x5b",
       "ExpectedArm64ASM": [
-        "fcvtzs v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "fcvtzs v3.4s, v2.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "subss xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf3 0x0f 0x5c"
       ],
       "ExpectedArm64ASM": [
-        "fsub s0, s16, s17",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fsub s0, s2, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "minss xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xf3 0x0f 0x5d"
       ],
       "ExpectedArm64ASM": [
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
         "mrs x20, nzcv",
-        "fcmp s16, s17",
-        "fcsel s0, s16, s17, mi",
-        "mov v16.s[0], v0.s[0]",
+        "mov v4.16b, v2.16b",
+        "fcmp s2, s3",
+        "fcsel s0, s2, s3, mi",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b",
         "msr nzcv, x20"
       ]
     },
     "divss xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf3 0x0f 0x5e"
       ],
       "ExpectedArm64ASM": [
-        "fdiv s0, s16, s17",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fdiv s0, s2, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "maxss xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xf3 0x0f 0x5f"
       ],
       "ExpectedArm64ASM": [
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
         "mrs x20, nzcv",
-        "fcmp s16, s17",
-        "fcsel s0, s17, s16, mi",
-        "mov v16.s[0], v0.s[0]",
+        "mov v4.16b, v2.16b",
+        "fcmp s2, s3",
+        "fcsel s0, s3, s2, mi",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b",
         "msr nzcv, x20"
       ]
     },
@@ -301,334 +397,421 @@
       "ExpectedArm64ASM": []
     },
     "movdqu xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0xf3 0x0f 0x6f",
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movdqu xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf3 0x0f 0x6f",
       "ExpectedArm64ASM": [
-        "ldr q16, [x4]"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "pshufhw xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Broadcast upper-half element 0",
         "0xf3 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.8h, v17.h[4]",
-        "trn1 v16.2d, v17.2d, v2.2d"
+        "mov v2.16b, v17.16b",
+        "dup v3.8h, v2.h[4]",
+        "trn1 v4.2d, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pshufhw xmm0, xmm1, 11100100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Identity copy",
         "0xf3 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v16.16b, v2.16b"
       ]
     },
     "pshufhw xmm0, xmm1, 01010000b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Upper elements Self-zip",
         "0xf3 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
-        "zip1 v2.8h, v17.8h, v17.8h",
-        "zip1 v16.2d, v17.2d, v2.2d"
+        "mov v2.16b, v17.16b",
+        "zip1 v3.8h, v2.8h, v2.8h",
+        "zip1 v4.2d, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pshufhw xmm0, xmm1, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Broadcast element 0 in the upper-half",
         "Upper-half Element 0 gets turned in to element 1",
         "0xf3 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
+        "mov v2.16b, v17.16b",
         "ldr x0, [x28, #1752]",
-        "ldr q2, [x0, #16]",
-        "tbl v16.16b, {v17.16b}, v2.16b"
+        "ldr q3, [x0, #16]",
+        "tbl v4.16b, {v2.16b}, v3.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pshufhw xmm0, xmm1, 0xff": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Broadcast upper-half Element 3",
         "0xf3 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.8h, v17.h[7]",
-        "trn1 v16.2d, v17.2d, v2.2d"
+        "mov v2.16b, v17.16b",
+        "dup v3.8h, v2.h[7]",
+        "trn1 v4.2d, v2.2d, v3.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "movq xmm0, xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf3 0x0f 0x7e",
       "ExpectedArm64ASM": [
-        "mov v16.8b, v16.8b"
+        "mov v2.16b, v16.16b",
+        "mov v3.8b, v2.8b",
+        "mov v16.16b, v3.16b"
       ]
     },
     "movq xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf3 0x0f 0x7e",
       "ExpectedArm64ASM": [
-        "mov v16.8b, v17.8b"
+        "mov v2.16b, v17.16b",
+        "mov v3.8b, v2.8b",
+        "mov v16.16b, v3.16b"
       ]
     },
     "movq xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf3 0x0f 0x7e",
       "ExpectedArm64ASM": [
-        "ldr d16, [x4]"
+        "mov x20, x4",
+        "ldr d2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movdqu [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf3 0x0f 0x7f",
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "popcnt ax, bx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 15,
       "Comment": "0xf3 0x0f 0xb8",
       "ExpectedArm64ASM": [
-        "uxth w20, w7",
-        "fmov s0, w20",
+        "mov x20, x7",
+        "uxth w21, w20",
+        "fmov s0, w21",
         "cnt v0.8b, v0.8b",
         "addp v0.8b, v0.8b, v0.8b",
         "umov w20, v0.b[0]",
-        "bfxil x4, x20, #0, #16",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22",
         "tst w20, w20",
-        "mov w26, #0x1",
-        "mov w27, #0x0"
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20"
       ]
     },
     "popcnt eax, ebx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0xf3 0x0f 0xb8",
       "ExpectedArm64ASM": [
-        "fmov s0, w7",
+        "mov x20, x7",
+        "fmov s0, w20",
         "cnt v0.8b, v0.8b",
         "addv b0, v0.8b",
-        "umov w4, v0.b[0]",
-        "tst w4, w4",
-        "mov w26, #0x1",
-        "mov w27, #0x0"
+        "umov w21, v0.b[0]",
+        "mov x4, x21",
+        "tst w21, w21",
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20"
       ]
     },
     "popcnt rax, rbx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": "0xf3 0x0f 0xb8",
       "ExpectedArm64ASM": [
-        "fmov d0, x7",
+        "mov x20, x7",
+        "fmov d0, x20",
         "cnt v0.8b, v0.8b",
         "addv b0, v0.8b",
-        "umov w4, v0.b[0]",
-        "tst w4, w4",
-        "mov w26, #0x1",
-        "mov w27, #0x0"
+        "umov w21, v0.b[0]",
+        "mov x4, x21",
+        "tst w21, w21",
+        "mov w20, #0x1",
+        "mov x26, x20",
+        "mov w20, #0x0",
+        "mov x27, x20"
       ]
     },
     "tzcnt ax, bx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": "0xf3 0x0f 0xbc",
       "ExpectedArm64ASM": [
-        "rbit w20, w7",
-        "orr w20, w20, #0x8000",
-        "clz w20, w20",
-        "bfxil x4, x20, #0, #16",
-        "cmn wzr, w20, lsl #16",
-        "ubfx x20, x20, #4, #1",
+        "mov x20, x7",
+        "rbit w21, w20",
+        "orr w21, w21, #0x8000",
+        "clz w21, w21",
+        "mov x20, x4",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22",
+        "cmn wzr, w21, lsl #16",
+        "ubfx x20, x21, #4, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "orr w22, w21, w20, lsl #29",
+        "msr nzcv, x22"
       ]
     },
     "tzcnt eax, ebx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0xf3 0x0f 0xbc",
       "ExpectedArm64ASM": [
-        "rbit w4, w7",
-        "clz w4, w4",
-        "tst w4, w4",
-        "ubfx x20, x4, #5, #1",
+        "mov x20, x7",
+        "rbit w21, w20",
+        "clz w21, w21",
+        "mov x4, x21",
+        "tst w21, w21",
+        "ubfx x20, x21, #5, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "orr w22, w21, w20, lsl #29",
+        "msr nzcv, x22"
       ]
     },
     "tzcnt rax, rbx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": "0xf3 0x0f 0xbc",
       "ExpectedArm64ASM": [
-        "rbit x4, x7",
-        "clz x4, x4",
-        "tst x4, x4",
-        "ubfx x20, x4, #6, #1",
+        "mov x20, x7",
+        "rbit x21, x20",
+        "clz x21, x21",
+        "mov x4, x21",
+        "tst x21, x21",
+        "ubfx x20, x21, #6, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "orr w22, w21, w20, lsl #29",
+        "msr nzcv, x22"
       ]
     },
     "lzcnt ax, bx": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 13,
       "Comment": "0xf3 0x0f 0xbd",
       "ExpectedArm64ASM": [
-        "lsl w20, w7, #16",
-        "orr w20, w20, #0x8000",
-        "clz w20, w20",
-        "bfxil x4, x20, #0, #16",
-        "cmn wzr, w20, lsl #16",
-        "ubfx x20, x20, #4, #1",
+        "mov x20, x7",
+        "lsl w21, w20, #16",
+        "orr w21, w21, #0x8000",
+        "clz w21, w21",
+        "mov x20, x4",
+        "mov x22, x20",
+        "bfxil x22, x21, #0, #16",
+        "mov x4, x22",
+        "cmn wzr, w21, lsl #16",
+        "ubfx x20, x21, #4, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "orr w22, w21, w20, lsl #29",
+        "msr nzcv, x22"
       ]
     },
     "lzcnt eax, ebx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "0xf3 0x0f 0xbd",
       "ExpectedArm64ASM": [
-        "clz w4, w7",
-        "tst w4, w4",
-        "ubfx x20, x4, #5, #1",
+        "mov x20, x7",
+        "clz w21, w20",
+        "mov x4, x21",
+        "tst w21, w21",
+        "ubfx x20, x21, #5, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "orr w22, w21, w20, lsl #29",
+        "msr nzcv, x22"
       ]
     },
     "lzcnt rax, rbx": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": "0xf3 0x0f 0xbd",
       "ExpectedArm64ASM": [
-        "clz x4, x7",
-        "tst x4, x4",
-        "ubfx x20, x4, #6, #1",
+        "mov x20, x7",
+        "clz x21, x20",
+        "mov x4, x21",
+        "tst x21, x21",
+        "ubfx x20, x21, #6, #1",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "orr w22, w21, w20, lsl #29",
+        "msr nzcv, x22"
       ]
     },
     "cmpss xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq s0, s16, s17",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmeq s0, s2, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpss xmm0, xmm1, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt s0, s17, s16",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmgt s0, s3, s2",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpss xmm0, xmm1, 2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge s0, s17, s16",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmge s0, s3, s2",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpss xmm0, xmm1, 3": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge s0, s16, s17",
-        "fcmgt s1, s17, s16",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmge s0, s2, s3",
+        "fcmgt s1, s3, s2",
         "orr v0.8b, v0.8b, v1.8b",
         "mvn v0.8b, v0.8b",
-        "mov v16.s[0], v0.s[0]"
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpss xmm0, xmm1, 4": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq s0, s16, s17",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmeq s0, s2, s3",
         "mvn v0.8b, v0.8b",
-        "mov v16.s[0], v0.s[0]"
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpss xmm0, xmm1, 5": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt s2, s17, s16",
-        "mvn v2.16b, v2.16b",
-        "mov v16.s[0], v2.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fcmgt s4, s3, s2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpss xmm0, xmm1, 6": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge s2, s17, s16",
-        "mvn v2.16b, v2.16b",
-        "mov v16.s[0], v2.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fcmge s4, s3, s2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpss xmm0, xmm1, 7": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xf3 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge s0, s16, s17",
-        "fcmgt s1, s17, s16",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmge s0, s2, s3",
+        "fcmgt s1, s3, s2",
         "orr v0.8b, v0.8b, v1.8b",
-        "mov v16.s[0], v0.s[0]"
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "movq2dq xmm0, mm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0xf3 0x0f 0xd6",
       "ExpectedArm64ASM": [
-        "ldr d16, [x28, #768]"
+        "ldr d2, [x28, #768]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "cvtdq2pd xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xf3 0x0f 0xe6",
       "ExpectedArm64ASM": [
-        "sxtl v2.2d, v17.2s",
-        "scvtf v16.2d, v2.2d"
+        "mov v2.16b, v17.16b",
+        "sxtl v3.2d, v2.2s",
+        "scvtf v2.2d, v3.2d",
+        "mov v16.16b, v2.16b"
       ]
     },
     "cvtdq2pd xmm0, [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xf3 0x0f 0xe6",
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
-        "sxtl v2.2d, v2.2s",
-        "scvtf v16.2d, v2.2d"
+        "mov x20, x4",
+        "ldr d2, [x20]",
+        "sxtl v3.2d, v2.2s",
+        "scvtf v2.2d, v3.2d",
+        "mov v16.16b, v2.16b"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/Secondary_REPNE.json b/unittests/InstructionCountCI/Secondary_REPNE.json
index d1481c81e9..d74741ceec 100644
--- a/unittests/InstructionCountCI/Secondary_REPNE.json
+++ b/unittests/InstructionCountCI/Secondary_REPNE.json
@@ -11,305 +11,396 @@
   },
   "Instructions": {
     "movsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xf2 0x0f 0x10",
       "ExpectedArm64ASM": [
-        "mov v16.d[0], v17.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "movsd xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf2 0x0f 0x10",
       "ExpectedArm64ASM": [
-        "ldr d16, [x4]"
+        "mov x20, x4",
+        "ldr d2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     },
     "movsd [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf2 0x0f 0x11",
       "ExpectedArm64ASM": [
-        "str d16, [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "str d2, [x20]"
       ]
     },
     "movddup xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf2 0x0f 0x12",
       "ExpectedArm64ASM": [
-        "dup v16.2d, v17.d[0]"
+        "mov v2.16b, v17.16b",
+        "dup v3.2d, v2.d[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "movddup xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xf2 0x0f 0x12",
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
-        "dup v16.2d, v2.d[0]"
+        "mov x20, x4",
+        "ldr d2, [x20]",
+        "dup v3.2d, v2.d[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "cvtsi2sd xmm0, eax": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf2 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "scvtf d0, w4",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "scvtf d0, w20",
+        "mov v3.d[0], v0.d[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "cvtsi2sd xmm0, dword [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf2 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr w20, [x4]",
-        "scvtf d0, w20",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr w21, [x20]",
+        "mov v3.16b, v2.16b",
+        "scvtf d0, w21",
+        "mov v3.d[0], v0.d[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "cvtsi2sd xmm0, rax": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf2 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "scvtf d0, x4",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "scvtf d0, x20",
+        "mov v3.d[0], v0.d[0]",
+        "mov v16.16b, v3.16b"
       ]
     },
     "cvtsi2sd xmm0, qword [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf2 0x0f 0x2a"
       ],
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
-        "scvtf d0, d2",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr d3, [x20]",
+        "mov v4.16b, v2.16b",
+        "scvtf d0, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "movntsd [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf2 0x0f 0x2b",
       "ExpectedArm64ASM": [
-        "str d16, [x4]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "str d2, [x20]"
       ]
     },
     "cvttsd2si eax, xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf2 0x0f 0x2c",
       "ExpectedArm64ASM": [
-        "fcvtzs w4, d16"
+        "mov v2.16b, v16.16b",
+        "fcvtzs w20, d2",
+        "mov x4, x20"
       ]
     },
     "cvttsd2si eax, qword [rbx]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xf2 0x0f 0x2c",
       "ExpectedArm64ASM": [
-        "ldr d2, [x7]",
-        "fcvtzs w4, d2"
+        "mov x20, x7",
+        "ldr d2, [x20]",
+        "fcvtzs w20, d2",
+        "mov x4, x20"
       ]
     },
     "cvttsd2si rax, xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf2 0x0f 0x2c",
       "ExpectedArm64ASM": [
-        "fcvtzs x4, d16"
+        "mov v2.16b, v16.16b",
+        "fcvtzs x20, d2",
+        "mov x4, x20"
       ]
     },
     "cvttsd2si rax, qword [rbx]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xf2 0x0f 0x2c",
       "ExpectedArm64ASM": [
-        "ldr d2, [x7]",
-        "fcvtzs x4, d2"
+        "mov x20, x7",
+        "ldr d2, [x20]",
+        "fcvtzs x20, d2",
+        "mov x4, x20"
       ]
     },
     "cvtsd2si eax, xmm0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xf2 0x0f 0x2d",
       "ExpectedArm64ASM": [
-        "frinti d0, d16",
-        "fcvtzs x4, d0"
+        "mov v2.16b, v16.16b",
+        "frinti d0, d2",
+        "fcvtzs x20, d0",
+        "mov x4, x20"
       ]
     },
     "cvtsd2si eax, qword [rbx]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xf2 0x0f 0x2d",
       "ExpectedArm64ASM": [
-        "ldr d2, [x7]",
+        "mov x20, x7",
+        "ldr d2, [x20]",
         "frinti d0, d2",
-        "fcvtzs x4, d0"
+        "fcvtzs x20, d0",
+        "mov x4, x20"
       ]
     },
     "cvtsd2si rax, xmm0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xf2 0x0f 0x2d",
       "ExpectedArm64ASM": [
-        "frinti d0, d16",
-        "fcvtzs x4, d0"
+        "mov v2.16b, v16.16b",
+        "frinti d0, d2",
+        "fcvtzs x20, d0",
+        "mov x4, x20"
       ]
     },
     "cvtsd2si rax, qword [rbx]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xf2 0x0f 0x2d",
       "ExpectedArm64ASM": [
-        "ldr d2, [x7]",
+        "mov x20, x7",
+        "ldr d2, [x20]",
         "frinti d0, d2",
-        "fcvtzs x4, d0"
+        "fcvtzs x20, d0",
+        "mov x4, x20"
       ]
     },
     "sqrtsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf2 0x0f 0x51"
       ],
       "ExpectedArm64ASM": [
-        "fsqrt d0, d17",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fsqrt d0, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "addsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf2 0x0f 0x58"
       ],
       "ExpectedArm64ASM": [
-        "fadd d0, d16, d17",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fadd d0, d2, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "mulsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf2 0x0f 0x59"
       ],
       "ExpectedArm64ASM": [
-        "fmul d0, d16, d17",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fmul d0, d2, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cvtsd2ss xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf2 0x0f 0x5a"
       ],
       "ExpectedArm64ASM": [
-        "fcvt s0, d17",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcvt s0, d3",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cvtsd2ss xmm0, [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf2 0x0f 0x5a"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "fcvt s0, d2",
-        "mov v16.s[0], v0.s[0]"
+        "mov v2.16b, v16.16b",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "mov v4.16b, v2.16b",
+        "fcvt s0, d3",
+        "mov v4.s[0], v0.s[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "subsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf2 0x0f 0x5c"
       ],
       "ExpectedArm64ASM": [
-        "fsub d0, d16, d17",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fsub d0, d2, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "minsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xf2 0x0f 0x5d"
       ],
       "ExpectedArm64ASM": [
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
         "mrs x20, nzcv",
-        "fcmp d16, d17",
-        "fcsel d0, d16, d17, mi",
-        "mov v16.d[0], v0.d[0]",
+        "mov v4.16b, v2.16b",
+        "fcmp d2, d3",
+        "fcsel d0, d2, d3, mi",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b",
         "msr nzcv, x20"
       ]
     },
     "divsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf2 0x0f 0x5e"
       ],
       "ExpectedArm64ASM": [
-        "fdiv d0, d16, d17",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fdiv d0, d2, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "maxsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xf2 0x0f 0x5f"
       ],
       "ExpectedArm64ASM": [
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
         "mrs x20, nzcv",
-        "fcmp d16, d17",
-        "fcsel d0, d17, d16, mi",
-        "mov v16.d[0], v0.d[0]",
+        "mov v4.16b, v2.16b",
+        "fcmp d2, d3",
+        "fcsel d0, d3, d2, mi",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b",
         "msr nzcv, x20"
       ]
     },
     "pshuflw xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Broadcast element 0",
         "0xf2 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.8h, v17.h[0]",
-        "trn2 v16.2d, v2.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "dup v3.8h, v2.h[0]",
+        "trn2 v4.2d, v3.2d, v2.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pshuflw xmm0, xmm1, 11100100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Identity copy",
         "0xf2 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov v2.16b, v17.16b",
+        "mov v16.16b, v2.16b"
       ]
     },
     "pshuflw xmm0, xmm1, 01010000b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Lower elements Self-zip",
         "0xf2 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
-        "zip1 v2.8h, v17.8h, v17.8h",
-        "zip1 v16.2d, v2.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "zip1 v3.8h, v2.8h, v2.8h",
+        "zip1 v4.2d, v3.2d, v2.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pshuflw xmm0, xmm1, 1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Broadcast first element in to Elements 1,2,3",
         "Element 0 gets turned in to element 1",
         "0xf2 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
+        "mov v2.16b, v17.16b",
         "ldr x0, [x28, #1744]",
-        "ldr q2, [x0, #16]",
-        "tbl v16.16b, {v17.16b}, v2.16b"
+        "ldr q3, [x0, #16]",
+        "tbl v4.16b, {v2.16b}, v3.16b",
+        "mov v16.16b, v4.16b"
       ]
     },
     "pshuflw xmm0, xmm1, 0xff": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Broadcast Element 3",
         "0xf2 0x0f 0x70"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.8h, v17.h[3]",
-        "trn2 v16.2d, v2.2d, v17.2d"
+        "mov v2.16b, v17.16b",
+        "dup v3.8h, v2.h[3]",
+        "trn2 v4.2d, v3.2d, v2.2d",
+        "mov v16.16b, v4.16b"
       ]
     },
     "insertq xmm0, xmm1, 0, 0": {
@@ -345,139 +436,185 @@
       ]
     },
     "haddps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": "0xf2 0x0f 0x7c",
       "ExpectedArm64ASM": [
-        "faddp v16.4s, v16.4s, v17.4s"
+        "mov v2.16b, v17.16b",
+        "mov v3.16b, v16.16b",
+        "faddp v4.4s, v3.4s, v2.4s",
+        "mov v16.16b, v4.16b"
       ]
     },
     "hsubps xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0xf2 0x0f 0x7d",
       "ExpectedArm64ASM": [
-        "uzp1 v2.4s, v16.4s, v17.4s",
-        "uzp2 v3.4s, v16.4s, v17.4s",
-        "fsub v16.4s, v2.4s, v3.4s"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "uzp1 v4.4s, v2.4s, v3.4s",
+        "uzp2 v5.4s, v2.4s, v3.4s",
+        "fsub v2.4s, v4.4s, v5.4s",
+        "mov v16.16b, v2.16b"
       ]
     },
     "cmpsd xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq d0, d16, d17",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmeq d0, d2, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpsd xmm0, xmm1, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt d0, d17, d16",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmgt d0, d3, d2",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpsd xmm0, xmm1, 2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge d0, d17, d16",
-        "mov v16.d[0], v0.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmge d0, d3, d2",
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpsd xmm0, xmm1, 3": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge d0, d16, d17",
-        "fcmgt d1, d17, d16",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmge d0, d2, d3",
+        "fcmgt d1, d3, d2",
         "orr v0.8b, v0.8b, v1.8b",
         "mvn v0.8b, v0.8b",
-        "mov v16.d[0], v0.d[0]"
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpsd xmm0, xmm1, 4": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq d0, d16, d17",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmeq d0, d2, d3",
         "mvn v0.8b, v0.8b",
-        "mov v16.d[0], v0.d[0]"
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpsd xmm0, xmm1, 5": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt d2, d17, d16",
-        "mvn v2.16b, v2.16b",
-        "mov v16.d[0], v2.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fcmgt d4, d3, d2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpsd xmm0, xmm1, 6": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge d2, d17, d16",
-        "mvn v2.16b, v2.16b",
-        "mov v16.d[0], v2.d[0]"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "fcmge d4, d3, d2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "cmpsd xmm0, xmm1, 7": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xf2 0x0f 0xc2"
       ],
       "ExpectedArm64ASM": [
-        "fcmge d0, d16, d17",
-        "fcmgt d1, d17, d16",
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "mov v4.16b, v2.16b",
+        "fcmge d0, d2, d3",
+        "fcmgt d1, d3, d2",
         "orr v0.8b, v0.8b, v1.8b",
-        "mov v16.d[0], v0.d[0]"
+        "mov v4.d[0], v0.d[0]",
+        "mov v16.16b, v4.16b"
       ]
     },
     "addsubps xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": "0xf2 0x0f 0xd0",
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2080]",
-        "eor v2.16b, v17.16b, v2.16b",
-        "fadd v16.4s, v16.4s, v2.4s"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "ldr q4, [x28, #2080]",
+        "eor v5.16b, v3.16b, v4.16b",
+        "fadd v3.4s, v2.4s, v5.4s",
+        "mov v16.16b, v3.16b"
       ]
     },
     "movdq2q mm0, xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": "0xf2 0x0f 0xd6",
       "ExpectedArm64ASM": [
-        "str d16, [x28, #768]"
+        "mov v2.16b, v16.16b",
+        "str d2, [x28, #768]"
       ]
     },
     "cvtpd2dq xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xf2 0x0f 0xe6",
       "ExpectedArm64ASM": [
-        "fcvtn v2.2s, v17.2d",
-        "frinti v16.4s, v2.4s",
-        "fcvtzs v16.4s, v16.4s"
+        "mov v2.16b, v17.16b",
+        "fcvtn v3.2s, v2.2d",
+        "frinti v2.4s, v3.4s",
+        "fcvtzs v2.4s, v2.4s",
+        "mov v16.16b, v2.16b"
       ]
     },
     "lddqu xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": "0xf2 0x0f 0xf0",
       "ExpectedArm64ASM": [
-        "ldr q16, [x4]"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "mov v16.16b, v2.16b"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/Secondary_REPNE_FCMA.json b/unittests/InstructionCountCI/Secondary_REPNE_FCMA.json
index c3cb468848..2842cea4bc 100644
--- a/unittests/InstructionCountCI/Secondary_REPNE_FCMA.json
+++ b/unittests/InstructionCountCI/Secondary_REPNE_FCMA.json
@@ -11,11 +11,14 @@
   },
   "Instructions": {
     "addsubps xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": "0xf2 0x0f 0xd0",
       "ExpectedArm64ASM": [
-        "rev64 v2.4s, v17.4s",
-        "fcadd v16.4s, v16.4s, v2.4s, #90"
+        "mov v2.16b, v16.16b",
+        "mov v3.16b, v17.16b",
+        "rev64 v4.4s, v3.4s",
+        "fcadd v3.4s, v2.4s, v4.4s, #90",
+        "mov v16.16b, v3.16b"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/Secondary_SVE128.json b/unittests/InstructionCountCI/Secondary_SVE128.json
index e7dc32671c..33c004fd4a 100644
--- a/unittests/InstructionCountCI/Secondary_SVE128.json
+++ b/unittests/InstructionCountCI/Secondary_SVE128.json
@@ -10,105 +10,117 @@
   },
   "Instructions": {
     "movmskps eax, xmm0": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0x50",
       "ExpectedArm64ASM": [
-        "ushr v2.4s, v16.4s, #31",
-        "index z3.s, #0, #1",
-        "ushl v2.4s, v2.4s, v3.4s",
-        "addv s2, v2.4s",
-        "mov w4, v2.s[0]"
+        "mov v2.16b, v16.16b",
+        "ushr v3.4s, v2.4s, #31",
+        "index z2.s, #0, #1",
+        "ushl v4.4s, v3.4s, v2.4s",
+        "addv s2, v4.4s",
+        "mov w20, v2.s[0]",
+        "mov x4, x20"
       ]
     },
     "movmskps rax, xmm0": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": "0x0f 0x50",
       "ExpectedArm64ASM": [
-        "ushr v2.4s, v16.4s, #31",
-        "index z3.s, #0, #1",
-        "ushl v2.4s, v2.4s, v3.4s",
-        "addv s2, v2.4s",
-        "mov w4, v2.s[0]"
+        "mov v2.16b, v16.16b",
+        "ushr v3.4s, v2.4s, #31",
+        "index z2.s, #0, #1",
+        "ushl v4.4s, v3.4s, v2.4s",
+        "addv s2, v4.4s",
+        "mov w20, v2.s[0]",
+        "mov x4, x20"
       ]
     },
     "psrlw mm0, mm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xd1",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "lsr z2.h, p6/m, z2.h, z3.d",
-        "str d2, [x28, #768]"
+        "movprfx z4, z2",
+        "lsr z4.h, p6/m, z4.h, z3.d",
+        "str d4, [x28, #768]"
       ]
     },
     "psrld mm0, mm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xd2",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "lsr z2.s, p6/m, z2.s, z3.d",
-        "str d2, [x28, #768]"
+        "movprfx z4, z2",
+        "lsr z4.s, p6/m, z4.s, z3.d",
+        "str d4, [x28, #768]"
       ]
     },
     "psrlq mm0, mm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xd3",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "lsr z2.d, p6/m, z2.d, z3.d",
-        "str d2, [x28, #768]"
+        "movprfx z4, z2",
+        "lsr z4.d, p6/m, z4.d, z3.d",
+        "str d4, [x28, #768]"
       ]
     },
     "psraw mm0, mm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xe1",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "asr z2.h, p6/m, z2.h, z3.d",
-        "str d2, [x28, #768]"
+        "movprfx z4, z2",
+        "asr z4.h, p6/m, z4.h, z3.d",
+        "str d4, [x28, #768]"
       ]
     },
     "psrad mm0, mm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xe2",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "asr z2.s, p6/m, z2.s, z3.d",
-        "str d2, [x28, #768]"
+        "movprfx z4, z2",
+        "asr z4.s, p6/m, z4.s, z3.d",
+        "str d4, [x28, #768]"
       ]
     },
     "psllw mm0, mm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xf1",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "lsl z2.h, p6/m, z2.h, z3.d",
-        "str d2, [x28, #768]"
+        "movprfx z4, z2",
+        "lsl z4.h, p6/m, z4.h, z3.d",
+        "str d4, [x28, #768]"
       ]
     },
     "pslld mm0, mm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xf2",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "lsl z2.s, p6/m, z2.s, z3.d",
-        "str d2, [x28, #768]"
+        "movprfx z4, z2",
+        "lsl z4.s, p6/m, z4.s, z3.d",
+        "str d4, [x28, #768]"
       ]
     },
     "psllq mm0, mm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": "0x0f 0xf3",
       "ExpectedArm64ASM": [
         "ldr d2, [x28, #768]",
         "ldr d3, [x28, #784]",
-        "lsl z2.d, p6/m, z2.d, z3.d",
-        "str d2, [x28, #768]"
+        "movprfx z4, z2",
+        "lsl z4.d, p6/m, z4.d, z3.d",
+        "str d4, [x28, #768]"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/VEX_map1.json b/unittests/InstructionCountCI/VEX_map1.json
index 395e02f157..662ab9f331 100644
--- a/unittests/InstructionCountCI/VEX_map1.json
+++ b/unittests/InstructionCountCI/VEX_map1.json
@@ -15,22 +15,26 @@
   },
   "Instructions": {
     "vmovups xmm0, xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x10 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vmovups xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "SVE 128-bit load already zero's the upper bits",
         "Map 1 0b00 0x10 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q16, [x4]"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovups ymm0, ymm0": {
@@ -45,31 +49,37 @@
       ]
     },
     "vmovups ymm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x10 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1b {z16.b}, p7/z, [x4]"
+        "mov x20, x4",
+        "ld1b {z2.b}, p7/z, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovupd xmm0, xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x10 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vmovupd xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "SVE 128-bit load already zero's the upper bits",
         "Map 1 0b01 0x10 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q16, [x4]"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovupd ymm0, ymm0": {
@@ -84,103 +94,125 @@
       ]
     },
     "vmovupd ymm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x10 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1b {z16.b}, p7/z, [x4]"
+        "mov x20, x4",
+        "ld1b {z2.b}, p7/z, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovss xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "32-bit vector load already zero's the upper bits",
         "Map 1 0b10 0x10 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr s16, [x4]"
+        "mov x20, x4",
+        "ldr s2, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Insert in to first element could be more optimal, which is the common case.",
         "Map 1 0b10 0x10 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "mov v16.s[0], v18.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmovsd xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "32-bit vector load already zero's the upper bits",
         "Map 1 0b11 0x10 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr d16, [x4]"
+        "mov x20, x4",
+        "ldr d2, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovsd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Insert in to first element could be more optimal, which is the common case.",
         "Map 1 0b11 0x10 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "mov v16.d[0], v18.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmovups [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x11 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "vmovups [rax], ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x11 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1b {z16.b}, p7, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1b {z2.b}, p7, [x20]"
       ]
     },
     "vmovupd [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x11 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "vmovupd [rax], ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x11 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1b {z16.b}, p7, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1b {z2.b}, p7, [x20]"
       ]
     },
     "vmovss [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b10 0x11 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "str s16, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "str s2, [x20]"
       ]
     },
     "db 0xc5, 0xf2, 0x11, 0xc2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "vmovss xmm2, xmm1, xmm0",
         "Need to manually encode since nasm won't encode this",
@@ -188,21 +220,26 @@
         "Map 1 0b10 0x11 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v18.16b, v17.16b",
-        "mov v18.s[0], v16.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov z18.d, p7/m, z4.d"
       ]
     },
     "vmovsd [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b11 0x11 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "str d16, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "str d2, [x20]"
       ]
     },
     "db 0xc5, 0xf3, 0x11, 0xc2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "vmovsd xmm2, xmm1, xmm0",
         "Need to manually encode since nasm won't encode this",
@@ -210,46 +247,57 @@
         "Map 1 0b11 0x11 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v18.16b, v17.16b",
-        "mov v18.d[0], v16.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov z18.d, p7/m, z4.d"
       ]
     },
     "vmovlps xmm0, xmm1, [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Insert in to first element could be more optimal, which is the common case.",
         "Map 1 0b00 0x12 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "mov v16.16b, v17.16b",
-        "mov v16.d[0], v2.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmovlpd xmm0, xmm1, [rax]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Insert in to first element could be more optimal, which is the common case.",
         "Map 1 0b01 0x12 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "mov v16.16b, v17.16b",
-        "mov v16.d[0], v2.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmovsldup xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b10 0x12 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "trn1 v16.4s, v2.4s, v2.4s"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "trn1 v3.4s, v2.4s, v2.4s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vmovsldup ymm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Could potentially be considered optimal.",
         "Ideally the load happens directly in the destination register",
@@ -258,22 +306,26 @@
         "Map 1 0b10 0x12 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1b {z2.b}, p7/z, [x4]",
-        "trn1 z16.s, z2.s, z2.s"
+        "mov x20, x4",
+        "ld1b {z2.b}, p7/z, [x20]",
+        "trn1 z3.s, z2.s, z2.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vmovddup xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b11 0x12 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr d2, [x4]",
-        "dup v16.2d, v2.d[0]"
+        "mov x20, x4",
+        "ldr d2, [x20]",
+        "dup v3.2d, v2.d[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vmovddup ymm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Could potentially be considered optimal.",
         "Ideally the load happens directly in the destination register",
@@ -282,3947 +334,4851 @@
         "Map 1 0b11 0x12 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1b {z2.b}, p7/z, [x4]",
-        "trn1 z16.d, z2.d, z2.d"
+        "mov x20, x4",
+        "ld1b {z2.b}, p7/z, [x20]",
+        "trn1 z3.d, z2.d, z2.d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vmovlps [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x13 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "str d16, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "str d2, [x20]"
       ]
     },
     "vmovlpd [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x13 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "str d16, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "str d2, [x20]"
       ]
     },
     "vunpcklps xmm0, xmm1, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b00 0x14 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "zip1 v16.4s, v17.4s, v2.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "zip1 v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vunpcklps ymm0, ymm1, [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 1 0b00 0x14 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1b {z2.b}, p7/z, [x4]",
-        "zip1 z3.s, z17.s, z2.s",
-        "zip2 z2.s, z17.s, z2.s",
-        "mov z1.q, q2",
-        "mov z16.d, z3.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "ld1b {z3.b}, p7/z, [x20]",
+        "zip1 z4.s, z2.s, z3.s",
+        "zip2 z5.s, z2.s, z3.s",
+        "mov z1.q, q5",
+        "mov z2.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vunpcklpd xmm0, xmm1, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0x14 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "zip1 v16.2d, v17.2d, v2.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "zip1 v4.2d, v2.2d, v3.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vunpcklpd ymm0, ymm1, [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 1 0b01 0x14 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1b {z2.b}, p7/z, [x4]",
-        "zip1 z3.d, z17.d, z2.d",
-        "zip2 z2.d, z17.d, z2.d",
-        "mov z1.q, q2",
-        "mov z16.d, z3.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "ld1b {z3.b}, p7/z, [x20]",
+        "zip1 z4.d, z2.d, z3.d",
+        "zip2 z5.d, z2.d, z3.d",
+        "mov z1.q, q5",
+        "mov z2.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vunpckhps xmm0, xmm1, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b00 0x15 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "zip2 v16.4s, v17.4s, v2.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "zip2 v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vunpckhps ymm0, ymm1, [rax]": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b00 0x15 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1b {z2.b}, p7/z, [x4]",
-        "zip1 z3.s, z17.s, z2.s",
-        "zip2 z2.s, z17.s, z2.s",
-        "mov z1.q, z3.q[1]",
-        "mov z16.d, z2.d",
-        "mov z16.b, p6/m, z1.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "ld1b {z3.b}, p7/z, [x20]",
+        "zip1 z4.s, z2.s, z3.s",
+        "zip2 z5.s, z2.s, z3.s",
+        "mov z1.q, z4.q[1]",
+        "mov z2.d, z5.d",
+        "mov z2.b, p6/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vunpckhpd xmm0, xmm1, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0x15 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "zip2 v16.2d, v17.2d, v2.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "ldr q3, [x20]",
+        "zip2 v4.2d, v2.2d, v3.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vunpckhpd ymm0, ymm1, [rax]": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b01 0x15 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1b {z2.b}, p7/z, [x4]",
-        "zip1 z3.d, z17.d, z2.d",
-        "zip2 z2.d, z17.d, z2.d",
-        "mov z1.q, z3.q[1]",
-        "mov z16.d, z2.d",
-        "mov z16.b, p6/m, z1.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "ld1b {z3.b}, p7/z, [x20]",
+        "zip1 z4.d, z2.d, z3.d",
+        "zip2 z5.d, z2.d, z3.d",
+        "mov z1.q, z4.q[1]",
+        "mov z2.d, z5.d",
+        "mov z2.b, p6/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovhps xmm0, xmm1, [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b00 0x16 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.8b, v17.8b",
-        "ldr d3, [x4]",
-        "mov v16.16b, v2.16b",
-        "mov v16.d[1], v3.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.8b, v2.8b",
+        "mov x20, x4",
+        "ldr d2, [x20]",
+        "mov v4.16b, v3.16b",
+        "mov v4.d[1], v2.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmovhpd xmm0, xmm1, [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0x16 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.8b, v17.8b",
-        "ldr d3, [x4]",
-        "mov v16.16b, v2.16b",
-        "mov v16.d[1], v3.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.8b, v2.8b",
+        "mov x20, x4",
+        "ldr d2, [x20]",
+        "mov v4.16b, v3.16b",
+        "mov v4.d[1], v2.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmovshdup xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b10 0x16 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "trn2 v16.4s, v2.4s, v2.4s"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "trn2 v3.4s, v2.4s, v2.4s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vmovshdup ymm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b10 0x16 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1b {z2.b}, p7/z, [x4]",
-        "trn2 z16.s, z2.s, z2.s"
+        "mov x20, x4",
+        "ld1b {z2.b}, p7/z, [x20]",
+        "trn2 z3.s, z2.s, z2.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vmovhps [rax], xmm0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b00 0x17 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v16.16b",
-        "mov v2.d[0], v16.d[1]",
-        "str d2, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "mov v3.d[0], v2.d[1]",
+        "mov x20, x4",
+        "str d3, [x20]"
       ]
     },
     "vmovhpd [rax], xmm0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0x17 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v16.16b",
-        "mov v2.d[0], v16.d[1]",
-        "str d2, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "mov v3.d[0], v2.d[1]",
+        "mov x20, x4",
+        "str d3, [x20]"
       ]
     },
     "vmovmskps rax, xmm0": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b00 0x50 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ushr v2.4s, v16.4s, #31",
-        "index z3.s, #0, #1",
-        "ushl v2.4s, v2.4s, v3.4s",
-        "addv s2, v2.4s",
-        "mov w4, v2.s[0]"
+        "mov z2.d, p7/m, z16.d",
+        "ushr v3.4s, v2.4s, #31",
+        "index z2.s, #0, #1",
+        "ushl v4.4s, v3.4s, v2.4s",
+        "addv s2, v4.4s",
+        "mov w20, v2.s[0]",
+        "mov x4, x20"
       ]
     },
     "vmovmskps rax, ymm0": {
-      "ExpectedInstructionCount": 41,
+      "ExpectedInstructionCount": 43,
       "Comment": [
         "Map 1 0b00 0x50 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z16.d",
         "mov w20, #0x0",
-        "mov w21, v16.s[0]",
-        "lsr w21, w21, #31",
-        "orr x20, x20, x21",
-        "mov w21, v16.s[1]",
-        "lsr w21, w21, #31",
-        "lsl w21, w21, #1",
-        "orr x20, x20, x21",
-        "mov w21, v16.s[2]",
-        "lsr w21, w21, #31",
-        "lsl w21, w21, #2",
-        "orr x20, x20, x21",
-        "mov w21, v16.s[3]",
-        "lsr w21, w21, #31",
-        "lsl w21, w21, #3",
-        "orr x20, x20, x21",
+        "mov w21, v2.s[0]",
+        "lsr w22, w21, #31",
+        "orr x21, x20, x22",
+        "mov w20, v2.s[1]",
+        "lsr w22, w20, #31",
+        "lsl w20, w22, #1",
+        "orr x22, x21, x20",
+        "mov w20, v2.s[2]",
+        "lsr w21, w20, #31",
+        "lsl w20, w21, #2",
+        "orr x21, x22, x20",
+        "mov w20, v2.s[3]",
+        "lsr w22, w20, #31",
+        "lsl w20, w22, #3",
+        "orr x22, x21, x20",
         "not p0.b, p7/z, p6.b",
-        "compact z0.d, p0, z16.d",
-        "mov w21, v16.s[0]",
-        "lsr w21, w21, #31",
-        "lsl w21, w21, #4",
-        "orr x20, x20, x21",
+        "compact z0.d, p0, z2.d",
+        "mov w20, v2.s[0]",
+        "lsr w21, w20, #31",
+        "lsl w20, w21, #4",
+        "orr x21, x22, x20",
         "not p0.b, p7/z, p6.b",
-        "compact z0.d, p0, z16.d",
-        "mov w21, v16.s[1]",
-        "lsr w21, w21, #31",
-        "lsl w21, w21, #5",
-        "orr x20, x20, x21",
+        "compact z0.d, p0, z2.d",
+        "mov w20, v2.s[1]",
+        "lsr w22, w20, #31",
+        "lsl w20, w22, #5",
+        "orr x22, x21, x20",
         "not p0.b, p7/z, p6.b",
-        "compact z0.d, p0, z16.d",
-        "mov w21, v16.s[2]",
-        "lsr w21, w21, #31",
-        "lsl w21, w21, #6",
-        "orr x20, x20, x21",
+        "compact z0.d, p0, z2.d",
+        "mov w20, v2.s[2]",
+        "lsr w21, w20, #31",
+        "lsl w20, w21, #6",
+        "orr x21, x22, x20",
         "not p0.b, p7/z, p6.b",
-        "compact z0.d, p0, z16.d",
-        "mov w21, v16.s[3]",
-        "lsr w21, w21, #31",
-        "lsl w21, w21, #7",
-        "orr x20, x20, x21",
-        "mov w4, w20"
+        "compact z0.d, p0, z2.d",
+        "mov w20, v2.s[3]",
+        "lsr w22, w20, #31",
+        "lsl w20, w22, #7",
+        "orr x22, x21, x20",
+        "mov w20, w22",
+        "mov x4, x20"
       ]
     },
     "vmovmskpd rax, xmm0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0x50 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uzp2 v2.4s, v16.4s, v16.4s",
-        "mov x20, v2.d[0]",
-        "bfi x20, x20, #31, #32",
-        "lsr x4, x20, #62"
+        "mov z2.d, p7/m, z16.d",
+        "uzp2 v3.4s, v2.4s, v2.4s",
+        "mov x20, v3.d[0]",
+        "mov x21, x20",
+        "bfi x21, x20, #31, #32",
+        "lsr x20, x21, #62",
+        "mov x4, x20"
       ]
     },
     "vmovmskpd rax, ymm0": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 23,
       "Comment": [
         "Map 1 0b01 0x50 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z16.d",
         "mov w20, #0x0",
-        "mov x21, v16.d[0]",
-        "lsr x21, x21, #63",
-        "orr x20, x20, x21",
-        "mov x21, v16.d[1]",
-        "lsr x21, x21, #63",
-        "lsl x21, x21, #1",
-        "orr x20, x20, x21",
+        "mov x21, v2.d[0]",
+        "lsr x22, x21, #63",
+        "orr x21, x20, x22",
+        "mov x20, v2.d[1]",
+        "lsr x22, x20, #63",
+        "lsl x20, x22, #1",
+        "orr x22, x21, x20",
         "not p0.b, p7/z, p6.b",
-        "compact z0.d, p0, z16.d",
-        "mov x21, v16.d[0]",
-        "lsr x21, x21, #63",
-        "lsl x21, x21, #2",
-        "orr x20, x20, x21",
+        "compact z0.d, p0, z2.d",
+        "mov x20, v2.d[0]",
+        "lsr x21, x20, #63",
+        "lsl x20, x21, #2",
+        "orr x21, x22, x20",
         "not p0.b, p7/z, p6.b",
-        "compact z0.d, p0, z16.d",
-        "mov x21, v16.d[1]",
-        "lsr x21, x21, #63",
-        "lsl x21, x21, #3",
-        "orr x20, x20, x21",
-        "mov w4, w20"
+        "compact z0.d, p0, z2.d",
+        "mov x20, v2.d[1]",
+        "lsr x22, x20, #63",
+        "lsl x20, x22, #3",
+        "orr x22, x21, x20",
+        "mov w20, w22",
+        "mov x4, x20"
       ]
     },
     "vsqrtps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x51 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fsqrt v16.4s, v17.4s"
+        "mov z2.d, p7/m, z17.d",
+        "fsqrt v3.4s, v2.4s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vsqrtps ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x51 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fsqrt z16.s, p7/m, z17.s"
+        "mov z2.d, p7/m, z17.d",
+        "fsqrt z3.s, p7/m, z2.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vsqrtpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x51 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fsqrt v16.2d, v17.2d"
+        "mov z2.d, p7/m, z17.d",
+        "fsqrt v3.2d, v2.2d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vsqrtpd ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x51 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fsqrt z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "fsqrt z3.d, p7/m, z2.d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vsqrtss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b10 0x51 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fsqrt s0, s18",
-        "mov v16.s[0], v0.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fsqrt s0, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vsqrtsd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b11 0x51 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fsqrt d0, d18",
-        "mov v16.d[0], v0.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fsqrt d0, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vrsqrtps xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b00 0x52 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
         "fmov v0.4s, #0x70 (1.0000)",
-        "fsqrt v1.4s, v17.4s",
-        "fdiv v16.4s, v0.4s, v1.4s"
+        "fsqrt v1.4s, v2.4s",
+        "fdiv v3.4s, v0.4s, v1.4s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vrsqrtps ymm0, ymm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b00 0x52 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fsqrt z0.s, p7/m, z17.s",
-        "fmov z16.s, #0x70 (1.0000)",
-        "fdiv z16.s, p7/m, z16.s, z0.s"
+        "mov z2.d, p7/m, z17.d",
+        "fsqrt z0.s, p7/m, z2.s",
+        "fmov z3.s, #0x70 (1.0000)",
+        "fdiv z3.s, p7/m, z3.s, z0.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vrsqrtss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b10 0x52 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
         "fmov s0, #0x70 (1.0000)",
-        "fsqrt s1, s18",
+        "fsqrt s1, s3",
         "fdiv s0, s0, s1",
-        "mov v16.s[0], v0.s[0]"
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vrcpps xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0x53 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
         "fmov v0.4s, #0x70 (1.0000)",
-        "fdiv v16.4s, v0.4s, v17.4s"
+        "fdiv v3.4s, v0.4s, v2.4s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vrcpps ymm0, ymm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b00 0x53 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
         "fmov z0.s, #0x70 (1.0000)",
-        "fdiv z0.s, p7/m, z0.s, z17.s",
-        "mov z16.d, z0.d"
+        "fdiv z0.s, p7/m, z0.s, z2.s",
+        "mov z3.d, z0.d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vrcpss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b10 0x53 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
         "fmov s0, #0x70 (1.0000)",
-        "fdiv s0, s0, s18",
-        "mov v16.s[0], v0.s[0]"
+        "fdiv s0, s0, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vandps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0x54 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "and v16.16b, v16.16b, v17.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "and v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vandps ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0x54 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "and z16.d, z16.d, z17.d"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "and z4.d, z2.d, z3.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vandpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x54 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "and v16.16b, v16.16b, v17.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "and v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vandpd ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x54 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "and z16.d, z16.d, z17.d"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "and z4.d, z2.d, z3.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vandnps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0x55 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "bic v16.16b, v17.16b, v16.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "bic v4.16b, v3.16b, v2.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vandnps ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0x55 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "bic z16.d, z17.d, z16.d"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "bic z4.d, z3.d, z2.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vandnpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x55 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "bic v16.16b, v17.16b, v16.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "bic v4.16b, v3.16b, v2.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vandnpd ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x55 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "bic z16.d, z17.d, z16.d"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "bic z4.d, z3.d, z2.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vorps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0x56 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "orr v16.16b, v16.16b, v17.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "orr v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vorps ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0x56 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "orr z16.d, z16.d, z17.d"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "orr z4.d, z2.d, z3.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vorpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x56 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "orr v16.16b, v16.16b, v17.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "orr v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vorpd ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x56 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "orr z16.d, z16.d, z17.d"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "orr z4.d, z2.d, z3.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vxorps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0x57 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "eor v16.16b, v16.16b, v17.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "eor v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vxorps ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0x57 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "eor z16.d, z16.d, z17.d"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "eor z4.d, z2.d, z3.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vxorpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x57 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "eor v16.16b, v16.16b, v17.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "eor v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vxorpd ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x57 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "eor z16.d, z16.d, z17.d"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "eor z4.d, z2.d, z3.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpunpcklbw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x60 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "zip1 v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "zip1 v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpunpcklbw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b01 0x60 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "zip1 z2.b, z17.b, z18.b",
-        "zip2 z3.b, z17.b, z18.b",
-        "mov z1.q, q3",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "zip1 z4.b, z2.b, z3.b",
+        "zip2 z5.b, z2.b, z3.b",
+        "mov z1.q, q5",
+        "mov z2.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpunpcklwd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x61 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "zip1 v16.8h, v17.8h, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "zip1 v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpunpcklwd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b01 0x61 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "zip1 z2.h, z17.h, z18.h",
-        "zip2 z3.h, z17.h, z18.h",
-        "mov z1.q, q3",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "zip1 z4.h, z2.h, z3.h",
+        "zip2 z5.h, z2.h, z3.h",
+        "mov z1.q, q5",
+        "mov z2.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpunpckldq xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x62 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "zip1 v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "zip1 v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpunpckldq ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b01 0x62 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "zip1 z2.s, z17.s, z18.s",
-        "zip2 z3.s, z17.s, z18.s",
-        "mov z1.q, q3",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "zip1 z4.s, z2.s, z3.s",
+        "zip2 z5.s, z2.s, z3.s",
+        "mov z1.q, q5",
+        "mov z2.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpacksswb xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0x63 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqxtn v16.8b, v17.8h",
-        "sqxtn2 v16.16b, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqxtn v4.8b, v2.8h",
+        "sqxtn2 v4.16b, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpacksswb ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 22,
       "Comment": [
         "Map 1 0b01 0x63 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqxtnb z1.b, z18.h",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqxtnb z1.b, z3.h",
         "uzp1 z1.b, z1.b, z1.b",
-        "sqxtnb z2.b, z17.h",
-        "uzp1 z2.b, z2.b, z2.b",
-        "splice z2.b, p6, z2.b, z1.b",
-        "mov z1.d, z2.d[1]",
-        "mov z3.d, z2.d",
+        "sqxtnb z4.b, z2.h",
+        "uzp1 z4.b, z4.b, z4.b",
+        "splice z4.b, p6, z4.b, z1.b",
+        "mov z1.d, z4.d[1]",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z3.d, p0/m, z1.d",
+        "mov z2.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z2.d[2]",
-        "mov z16.d, z3.d",
+        "mov z1.d, z4.d[2]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpcmpgtb xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x64 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "cmgt v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "cmgt v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpcmpgtb ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b01 0x64 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mrs x0, nzcv",
-        "cmpgt p0.b, p7/z, z17.b, z18.b",
-        "not z0.b, p0/m, z17.b",
-        "movprfx z16.b, p0/z, z17.b",
-        "orr z16.b, p0/m, z16.b, z0.b",
-        "msr nzcv, x0"
+        "cmpgt p0.b, p7/z, z2.b, z3.b",
+        "not z0.b, p0/m, z2.b",
+        "movprfx z4.b, p0/z, z2.b",
+        "orr z4.b, p0/m, z4.b, z0.b",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpcmpgtw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x65 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "cmgt v16.8h, v17.8h, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "cmgt v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpcmpgtw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b01 0x65 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mrs x0, nzcv",
-        "cmpgt p0.h, p7/z, z17.h, z18.h",
-        "not z0.h, p0/m, z17.h",
-        "movprfx z16.h, p0/z, z17.h",
-        "orr z16.h, p0/m, z16.h, z0.h",
-        "msr nzcv, x0"
+        "cmpgt p0.h, p7/z, z2.h, z3.h",
+        "not z0.h, p0/m, z2.h",
+        "movprfx z4.h, p0/z, z2.h",
+        "orr z4.h, p0/m, z4.h, z0.h",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpcmpgtd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x66 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "cmgt v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "cmgt v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpcmpgtd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b01 0x66 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mrs x0, nzcv",
-        "cmpgt p0.s, p7/z, z17.s, z18.s",
-        "not z0.s, p0/m, z17.s",
-        "movprfx z16.s, p0/z, z17.s",
-        "orr z16.s, p0/m, z16.s, z0.s",
-        "msr nzcv, x0"
+        "cmpgt p0.s, p7/z, z2.s, z3.s",
+        "not z0.s, p0/m, z2.s",
+        "movprfx z4.s, p0/z, z2.s",
+        "orr z4.s, p0/m, z4.s, z0.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpackuswb xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0x67 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqxtun v16.8b, v17.8h",
-        "sqxtun2 v16.16b, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqxtun v4.8b, v2.8h",
+        "sqxtun2 v4.16b, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpackuswb ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 22,
       "Comment": [
         "Map 1 0b01 0x67 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqxtunb z1.b, z18.h",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqxtunb z1.b, z3.h",
         "uzp1 z1.b, z1.b, z1.b",
-        "sqxtunb z2.b, z17.h",
-        "uzp1 z2.b, z2.b, z2.b",
-        "splice z2.b, p6, z2.b, z1.b",
-        "mov z1.d, z2.d[1]",
-        "mov z3.d, z2.d",
+        "sqxtunb z4.b, z2.h",
+        "uzp1 z4.b, z4.b, z4.b",
+        "splice z4.b, p6, z4.b, z1.b",
+        "mov z1.d, z4.d[1]",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z3.d, p0/m, z1.d",
+        "mov z2.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z2.d[2]",
-        "mov z16.d, z3.d",
+        "mov z1.d, z4.d[2]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpshufd xmm0, xmm1, 00b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 1 0b01 0x70 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v17.16b",
-        "mov v2.s[0], v17.s[0]",
-        "mov v2.s[1], v17.s[0]",
-        "mov v2.s[2], v17.s[0]",
-        "mov v16.16b, v2.16b",
-        "mov v16.s[3], v17.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[0], v2.s[0]",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[1], v2.s[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[2], v2.s[0]",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[3], v2.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshufd xmm0, xmm1, 01b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 1 0b01 0x70 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v17.16b",
-        "mov v2.s[0], v17.s[1]",
-        "mov v2.s[1], v17.s[0]",
-        "mov v2.s[2], v17.s[0]",
-        "mov v16.16b, v2.16b",
-        "mov v16.s[3], v17.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[0], v2.s[1]",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[1], v2.s[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[2], v2.s[0]",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[3], v2.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshufd xmm0, xmm1, 10b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 1 0b01 0x70 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v17.16b",
-        "mov v2.s[0], v17.s[2]",
-        "mov v2.s[1], v17.s[0]",
-        "mov v2.s[2], v17.s[0]",
-        "mov v16.16b, v2.16b",
-        "mov v16.s[3], v17.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[0], v2.s[2]",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[1], v2.s[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[2], v2.s[0]",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[3], v2.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshufd xmm0, xmm1, 11b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 1 0b01 0x70 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v17.16b",
-        "mov v2.s[0], v17.s[3]",
-        "mov v2.s[1], v17.s[0]",
-        "mov v2.s[2], v17.s[0]",
-        "mov v16.16b, v2.16b",
-        "mov v16.s[3], v17.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[0], v2.s[3]",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[1], v2.s[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[2], v2.s[0]",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[3], v2.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshufd ymm0, ymm1, 00b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 58,
       "Comment": [
         "Map 1 0b01 0x70 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.s, s17",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z1.s, s2",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
-        "mov z16.d, z2.d",
+        "mov z1.s, z2.s[4]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
-        "mov z16.s, p0/m, z1.s",
-        "msr nzcv, x0"
+        "mov z4.s, p0/m, z1.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshufd ymm0, ymm1, 01b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 58,
       "Comment": [
         "Map 1 0b01 0x70 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.s, z17.s[1]",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z1.s, z2.s[1]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[5]",
+        "mov z1.s, z2.s[5]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
-        "mov z16.d, z2.d",
+        "mov z1.s, z2.s[4]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
-        "mov z16.s, p0/m, z1.s",
-        "msr nzcv, x0"
+        "mov z4.s, p0/m, z1.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshufd ymm0, ymm1, 10b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 58,
       "Comment": [
         "Map 1 0b01 0x70 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.s, z17.s[2]",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z1.s, z2.s[2]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[6]",
+        "mov z1.s, z2.s[6]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
-        "mov z16.d, z2.d",
+        "mov z1.s, z2.s[4]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
-        "mov z16.s, p0/m, z1.s",
-        "msr nzcv, x0"
+        "mov z4.s, p0/m, z1.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshufd ymm0, ymm1, 11b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 58,
       "Comment": [
         "Map 1 0b01 0x70 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.s, z17.s[3]",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z1.s, z2.s[3]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[7]",
+        "mov z1.s, z2.s[7]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
-        "mov z16.d, z2.d",
+        "mov z1.s, z2.s[4]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
-        "mov z16.s, p0/m, z1.s",
-        "msr nzcv, x0"
+        "mov z4.s, p0/m, z1.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshufhw xmm0, xmm1, 00b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 1 0b10 0x70 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v17.16b",
-        "mov v2.h[4], v17.h[4]",
-        "mov v2.h[5], v17.h[4]",
-        "mov v2.h[6], v17.h[4]",
-        "mov v16.16b, v2.16b",
-        "mov v16.h[7], v17.h[4]"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[4], v2.h[4]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[5], v2.h[4]",
+        "mov v3.16b, v4.16b",
+        "mov v3.h[6], v2.h[4]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[7], v2.h[4]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshufhw xmm0, xmm1, 01b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 1 0b10 0x70 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v17.16b",
-        "mov v2.h[4], v17.h[5]",
-        "mov v2.h[5], v17.h[4]",
-        "mov v2.h[6], v17.h[4]",
-        "mov v16.16b, v2.16b",
-        "mov v16.h[7], v17.h[4]"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[4], v2.h[5]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[5], v2.h[4]",
+        "mov v3.16b, v4.16b",
+        "mov v3.h[6], v2.h[4]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[7], v2.h[4]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshufhw xmm0, xmm1, 10b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 1 0b10 0x70 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v17.16b",
-        "mov v2.h[4], v17.h[6]",
-        "mov v2.h[5], v17.h[4]",
-        "mov v2.h[6], v17.h[4]",
-        "mov v16.16b, v2.16b",
-        "mov v16.h[7], v17.h[4]"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[4], v2.h[6]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[5], v2.h[4]",
+        "mov v3.16b, v4.16b",
+        "mov v3.h[6], v2.h[4]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[7], v2.h[4]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshufhw xmm0, xmm1, 11b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 1 0b10 0x70 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v17.16b",
-        "mov v2.h[4], v17.h[7]",
-        "mov v2.h[5], v17.h[4]",
-        "mov v2.h[6], v17.h[4]",
-        "mov v16.16b, v2.16b",
-        "mov v16.h[7], v17.h[4]"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[4], v2.h[7]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[5], v2.h[4]",
+        "mov v3.16b, v4.16b",
+        "mov v3.h[6], v2.h[4]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[7], v2.h[4]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshufhw ymm0, ymm1, 00b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 58,
       "Comment": [
         "Map 1 0b10 0x70 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.h, z17.h[4]",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z1.h, z2.h[4]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-4",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[12]",
+        "mov z1.h, z2.h[12]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #4",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[4]",
+        "mov z1.h, z2.h[4]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-3",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[12]",
+        "mov z1.h, z2.h[12]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #5",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[4]",
+        "mov z1.h, z2.h[4]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-2",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[12]",
+        "mov z1.h, z2.h[12]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #6",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[4]",
+        "mov z1.h, z2.h[4]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-1",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[12]",
-        "mov z16.d, z2.d",
+        "mov z1.h, z2.h[12]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #7",
-        "mov z16.h, p0/m, z1.h",
-        "msr nzcv, x0"
+        "mov z4.h, p0/m, z1.h",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshufhw ymm0, ymm1, 01b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 58,
       "Comment": [
         "Map 1 0b10 0x70 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.h, z17.h[5]",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z1.h, z2.h[5]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-4",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[13]",
+        "mov z1.h, z2.h[13]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #4",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[4]",
+        "mov z1.h, z2.h[4]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-3",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[12]",
+        "mov z1.h, z2.h[12]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #5",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[4]",
+        "mov z1.h, z2.h[4]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-2",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[12]",
+        "mov z1.h, z2.h[12]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #6",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[4]",
+        "mov z1.h, z2.h[4]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-1",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[12]",
-        "mov z16.d, z2.d",
+        "mov z1.h, z2.h[12]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #7",
-        "mov z16.h, p0/m, z1.h",
-        "msr nzcv, x0"
+        "mov z4.h, p0/m, z1.h",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshufhw ymm0, ymm1, 10b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 58,
       "Comment": [
         "Map 1 0b10 0x70 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.h, z17.h[6]",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z1.h, z2.h[6]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-4",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[14]",
+        "mov z1.h, z2.h[14]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #4",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[4]",
+        "mov z1.h, z2.h[4]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-3",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[12]",
+        "mov z1.h, z2.h[12]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #5",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[4]",
+        "mov z1.h, z2.h[4]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-2",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[12]",
+        "mov z1.h, z2.h[12]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #6",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[4]",
+        "mov z1.h, z2.h[4]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-1",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[12]",
-        "mov z16.d, z2.d",
+        "mov z1.h, z2.h[12]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #7",
-        "mov z16.h, p0/m, z1.h",
-        "msr nzcv, x0"
+        "mov z4.h, p0/m, z1.h",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshufhw ymm0, ymm1, 11b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 58,
       "Comment": [
         "Map 1 0b10 0x70 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.h, z17.h[7]",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z1.h, z2.h[7]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-4",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[15]",
+        "mov z1.h, z2.h[15]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #4",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[4]",
+        "mov z1.h, z2.h[4]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-3",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[12]",
+        "mov z1.h, z2.h[12]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #5",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[4]",
+        "mov z1.h, z2.h[4]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-2",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[12]",
+        "mov z1.h, z2.h[12]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #6",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[4]",
+        "mov z1.h, z2.h[4]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-1",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[12]",
-        "mov z16.d, z2.d",
+        "mov z1.h, z2.h[12]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #7",
-        "mov z16.h, p0/m, z1.h",
-        "msr nzcv, x0"
+        "mov z4.h, p0/m, z1.h",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshuflw xmm0, xmm1, 00b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 1 0b11 0x70 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v17.16b",
-        "mov v2.h[0], v17.h[0]",
-        "mov v2.h[1], v17.h[0]",
-        "mov v2.h[2], v17.h[0]",
-        "mov v16.16b, v2.16b",
-        "mov v16.h[3], v17.h[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[0], v2.h[0]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[1], v2.h[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.h[2], v2.h[0]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[3], v2.h[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshuflw xmm0, xmm1, 01b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 1 0b11 0x70 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v17.16b",
-        "mov v2.h[0], v17.h[1]",
-        "mov v2.h[1], v17.h[0]",
-        "mov v2.h[2], v17.h[0]",
-        "mov v16.16b, v2.16b",
-        "mov v16.h[3], v17.h[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[0], v2.h[1]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[1], v2.h[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.h[2], v2.h[0]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[3], v2.h[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshuflw xmm0, xmm1, 10b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 1 0b11 0x70 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v17.16b",
-        "mov v2.h[0], v17.h[2]",
-        "mov v2.h[1], v17.h[0]",
-        "mov v2.h[2], v17.h[0]",
-        "mov v16.16b, v2.16b",
-        "mov v16.h[3], v17.h[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[0], v2.h[2]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[1], v2.h[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.h[2], v2.h[0]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[3], v2.h[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshuflw xmm0, xmm1, 11b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 1 0b11 0x70 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v17.16b",
-        "mov v2.h[0], v17.h[3]",
-        "mov v2.h[1], v17.h[0]",
-        "mov v2.h[2], v17.h[0]",
-        "mov v16.16b, v2.16b",
-        "mov v16.h[3], v17.h[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[0], v2.h[3]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[1], v2.h[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.h[2], v2.h[0]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[3], v2.h[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshuflw ymm0, ymm1, 00b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 58,
       "Comment": [
         "Map 1 0b11 0x70 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.h, h17",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z1.h, h2",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-8",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[8]",
+        "mov z1.h, z2.h[8]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #0",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, h17",
+        "mov z1.h, h2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-7",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[8]",
+        "mov z1.h, z2.h[8]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #1",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, h17",
+        "mov z1.h, h2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-6",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[8]",
+        "mov z1.h, z2.h[8]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #2",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, h17",
+        "mov z1.h, h2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-5",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[8]",
-        "mov z16.d, z2.d",
+        "mov z1.h, z2.h[8]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #3",
-        "mov z16.h, p0/m, z1.h",
-        "msr nzcv, x0"
+        "mov z4.h, p0/m, z1.h",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshuflw ymm0, ymm1, 01b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 58,
       "Comment": [
         "Map 1 0b11 0x70 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.h, z17.h[1]",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z1.h, z2.h[1]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-8",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[9]",
+        "mov z1.h, z2.h[9]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #0",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, h17",
+        "mov z1.h, h2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-7",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[8]",
+        "mov z1.h, z2.h[8]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #1",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, h17",
+        "mov z1.h, h2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-6",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[8]",
+        "mov z1.h, z2.h[8]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #2",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, h17",
+        "mov z1.h, h2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-5",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[8]",
-        "mov z16.d, z2.d",
+        "mov z1.h, z2.h[8]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #3",
-        "mov z16.h, p0/m, z1.h",
-        "msr nzcv, x0"
+        "mov z4.h, p0/m, z1.h",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshuflw ymm0, ymm1, 10b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 58,
       "Comment": [
         "Map 1 0b11 0x70 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.h, z17.h[2]",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z1.h, z2.h[2]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-8",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[10]",
+        "mov z1.h, z2.h[10]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #0",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, h17",
+        "mov z1.h, h2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-7",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[8]",
+        "mov z1.h, z2.h[8]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #1",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, h17",
+        "mov z1.h, h2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-6",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[8]",
+        "mov z1.h, z2.h[8]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #2",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, h17",
+        "mov z1.h, h2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-5",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[8]",
-        "mov z16.d, z2.d",
+        "mov z1.h, z2.h[8]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #3",
-        "mov z16.h, p0/m, z1.h",
-        "msr nzcv, x0"
+        "mov z4.h, p0/m, z1.h",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpshuflw ymm0, ymm1, 11b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 58,
       "Comment": [
         "Map 1 0b11 0x70 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.h, z17.h[3]",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z1.h, z2.h[3]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-8",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[11]",
+        "mov z1.h, z2.h[11]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #0",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, h17",
+        "mov z1.h, h2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-7",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[8]",
+        "mov z1.h, z2.h[8]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #1",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, h17",
+        "mov z1.h, h2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-6",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[8]",
+        "mov z1.h, z2.h[8]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #2",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, h17",
+        "mov z1.h, h2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-5",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[8]",
-        "mov z16.d, z2.d",
+        "mov z1.h, z2.h[8]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #3",
-        "mov z16.h, p0/m, z1.h",
-        "msr nzcv, x0"
+        "mov z4.h, p0/m, z1.h",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpcmpeqb xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x74 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "cmeq v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "cmeq v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpcmpeqb ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b01 0x74 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mrs x0, nzcv",
-        "cmpeq p0.b, p7/z, z17.b, z18.b",
-        "not z0.b, p0/m, z17.b",
-        "movprfx z16.b, p0/z, z17.b",
-        "orr z16.b, p0/m, z16.b, z0.b",
-        "msr nzcv, x0"
+        "cmpeq p0.b, p7/z, z2.b, z3.b",
+        "not z0.b, p0/m, z2.b",
+        "movprfx z4.b, p0/z, z2.b",
+        "orr z4.b, p0/m, z4.b, z0.b",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpcmpeqw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x75 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "cmeq v16.8h, v17.8h, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "cmeq v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpcmpeqw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b01 0x75 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mrs x0, nzcv",
-        "cmpeq p0.h, p7/z, z17.h, z18.h",
-        "not z0.h, p0/m, z17.h",
-        "movprfx z16.h, p0/z, z17.h",
-        "orr z16.h, p0/m, z16.h, z0.h",
-        "msr nzcv, x0"
+        "cmpeq p0.h, p7/z, z2.h, z3.h",
+        "not z0.h, p0/m, z2.h",
+        "movprfx z4.h, p0/z, z2.h",
+        "orr z4.h, p0/m, z4.h, z0.h",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpcmpeqd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x76 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "cmeq v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "cmeq v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpcmpeqd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b01 0x76 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mrs x0, nzcv",
-        "cmpeq p0.s, p7/z, z17.s, z18.s",
-        "not z0.s, p0/m, z17.s",
-        "movprfx z16.s, p0/z, z17.s",
-        "orr z16.s, p0/m, z16.s, z0.s",
-        "msr nzcv, x0"
+        "cmpeq p0.s, p7/z, z2.s, z3.s",
+        "not z0.s, p0/m, z2.s",
+        "movprfx z4.s, p0/z, z2.s",
+        "orr z4.s, p0/m, z4.s, z0.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vzeroupper": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 48,
       "Comment": [
         "Might need to revisit this if move renaming ends up slower than some other clearing",
         "Map 1 0b01 0x77 L=0"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "mov v17.16b, v17.16b",
-        "mov v18.16b, v18.16b",
-        "mov v19.16b, v19.16b",
-        "mov v20.16b, v20.16b",
-        "mov v21.16b, v21.16b",
-        "mov v22.16b, v22.16b",
-        "mov v23.16b, v23.16b",
-        "mov v24.16b, v24.16b",
-        "mov v25.16b, v25.16b",
-        "mov v26.16b, v26.16b",
-        "mov v27.16b, v27.16b",
-        "mov v28.16b, v28.16b",
-        "mov v29.16b, v29.16b",
-        "mov v30.16b, v30.16b",
-        "mov v31.16b, v31.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z17.d, p7/m, z3.d",
+        "mov z2.d, p7/m, z18.d",
+        "mov v3.16b, v2.16b",
+        "mov z18.d, p7/m, z3.d",
+        "mov z2.d, p7/m, z19.d",
+        "mov v3.16b, v2.16b",
+        "mov z19.d, p7/m, z3.d",
+        "mov z2.d, p7/m, z20.d",
+        "mov v3.16b, v2.16b",
+        "mov z20.d, p7/m, z3.d",
+        "mov z2.d, p7/m, z21.d",
+        "mov v3.16b, v2.16b",
+        "mov z21.d, p7/m, z3.d",
+        "mov z2.d, p7/m, z22.d",
+        "mov v3.16b, v2.16b",
+        "mov z22.d, p7/m, z3.d",
+        "mov z2.d, p7/m, z23.d",
+        "mov v3.16b, v2.16b",
+        "mov z23.d, p7/m, z3.d",
+        "mov z2.d, p7/m, z24.d",
+        "mov v3.16b, v2.16b",
+        "mov z24.d, p7/m, z3.d",
+        "mov z2.d, p7/m, z25.d",
+        "mov v3.16b, v2.16b",
+        "mov z25.d, p7/m, z3.d",
+        "mov z2.d, p7/m, z26.d",
+        "mov v3.16b, v2.16b",
+        "mov z26.d, p7/m, z3.d",
+        "mov z2.d, p7/m, z27.d",
+        "mov v3.16b, v2.16b",
+        "mov z27.d, p7/m, z3.d",
+        "mov z2.d, p7/m, z28.d",
+        "mov v3.16b, v2.16b",
+        "mov z28.d, p7/m, z3.d",
+        "mov z2.d, p7/m, z29.d",
+        "mov v3.16b, v2.16b",
+        "mov z29.d, p7/m, z3.d",
+        "mov z2.d, p7/m, z30.d",
+        "mov v3.16b, v2.16b",
+        "mov z30.d, p7/m, z3.d",
+        "mov z2.d, p7/m, z31.d",
+        "mov v3.16b, v2.16b",
+        "mov z31.d, p7/m, z3.d"
       ]
     },
     "vzeroall": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 32,
       "Comment": [
         "Map 1 0b01 0x77 L=1"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0",
-        "movi v17.2d, #0x0",
-        "movi v18.2d, #0x0",
-        "movi v19.2d, #0x0",
-        "movi v20.2d, #0x0",
-        "movi v21.2d, #0x0",
-        "movi v22.2d, #0x0",
-        "movi v23.2d, #0x0",
-        "movi v24.2d, #0x0",
-        "movi v25.2d, #0x0",
-        "movi v26.2d, #0x0",
-        "movi v27.2d, #0x0",
-        "movi v28.2d, #0x0",
-        "movi v29.2d, #0x0",
-        "movi v30.2d, #0x0",
-        "movi v31.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov z16.d, p7/m, z2.d",
+        "movi v2.2d, #0x0",
+        "mov z17.d, p7/m, z2.d",
+        "movi v2.2d, #0x0",
+        "mov z18.d, p7/m, z2.d",
+        "movi v2.2d, #0x0",
+        "mov z19.d, p7/m, z2.d",
+        "movi v2.2d, #0x0",
+        "mov z20.d, p7/m, z2.d",
+        "movi v2.2d, #0x0",
+        "mov z21.d, p7/m, z2.d",
+        "movi v2.2d, #0x0",
+        "mov z22.d, p7/m, z2.d",
+        "movi v2.2d, #0x0",
+        "mov z23.d, p7/m, z2.d",
+        "movi v2.2d, #0x0",
+        "mov z24.d, p7/m, z2.d",
+        "movi v2.2d, #0x0",
+        "mov z25.d, p7/m, z2.d",
+        "movi v2.2d, #0x0",
+        "mov z26.d, p7/m, z2.d",
+        "movi v2.2d, #0x0",
+        "mov z27.d, p7/m, z2.d",
+        "movi v2.2d, #0x0",
+        "mov z28.d, p7/m, z2.d",
+        "movi v2.2d, #0x0",
+        "mov z29.d, p7/m, z2.d",
+        "movi v2.2d, #0x0",
+        "mov z30.d, p7/m, z2.d",
+        "movi v2.2d, #0x0",
+        "mov z31.d, p7/m, z2.d"
       ]
     },
     "vcmpps xmm0, xmm1, xmm2, 0x00": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmeq v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpps ymm0, ymm1, ymm2, 0x00": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b00 0xC2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq p0.s, p7/z, z17.s, z18.s",
-        "not z0.s, p0/m, z17.s",
-        "movprfx z16.s, p0/z, z17.s",
-        "orr z16.s, p0/m, z16.s, z0.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmeq p0.s, p7/z, z2.s, z3.s",
+        "not z0.s, p0/m, z2.s",
+        "movprfx z4.s, p0/z, z2.s",
+        "orr z4.s, p0/m, z4.s, z0.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpps xmm0, xmm1, xmm2, 0x01": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt v16.4s, v18.4s, v17.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt v4.4s, v3.4s, v2.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpps ymm0, ymm1, ymm2, 0x01": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b00 0xC2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt p0.s, p7/z, z18.s, z17.s",
-        "not z0.s, p0/m, z18.s",
-        "movprfx z16.s, p0/z, z18.s",
-        "orr z16.s, p0/m, z16.s, z0.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt p0.s, p7/z, z3.s, z2.s",
+        "not z0.s, p0/m, z3.s",
+        "movprfx z4.s, p0/z, z3.s",
+        "orr z4.s, p0/m, z4.s, z0.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpps xmm0, xmm1, xmm2, 0x02": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmge v16.4s, v18.4s, v17.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmge v4.4s, v3.4s, v2.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpps ymm0, ymm1, ymm2, 0x02": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b00 0xC2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmge p0.s, p7/z, z18.s, z17.s",
-        "not z0.s, p0/m, z18.s",
-        "movprfx z16.s, p0/z, z18.s",
-        "orr z16.s, p0/m, z16.s, z0.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmge p0.s, p7/z, z3.s, z2.s",
+        "not z0.s, p0/m, z3.s",
+        "movprfx z4.s, p0/z, z3.s",
+        "orr z4.s, p0/m, z4.s, z0.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpps xmm0, xmm1, xmm2, 0x03": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b00 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmge v0.4s, v17.4s, v18.4s",
-        "fcmgt v1.4s, v18.4s, v17.4s",
-        "orr v16.16b, v0.16b, v1.16b",
-        "mvn v16.16b, v16.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmge v0.4s, v2.4s, v3.4s",
+        "fcmgt v1.4s, v3.4s, v2.4s",
+        "orr v4.16b, v0.16b, v1.16b",
+        "mvn v4.16b, v4.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpps ymm0, ymm1, ymm2, 0x03": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b00 0xC2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmuo p0.s, p7/z, z17.s, z18.s",
-        "not z0.s, p0/m, z17.s",
-        "movprfx z16.s, p0/z, z17.s",
-        "orr z16.s, p0/m, z16.s, z0.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmuo p0.s, p7/z, z2.s, z3.s",
+        "not z0.s, p0/m, z2.s",
+        "movprfx z4.s, p0/z, z2.s",
+        "orr z4.s, p0/m, z4.s, z0.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpps xmm0, xmm1, xmm2, 0x04": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b00 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq v16.4s, v17.4s, v18.4s",
-        "mvn v16.16b, v16.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmeq v4.4s, v2.4s, v3.4s",
+        "mvn v4.16b, v4.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpps ymm0, ymm1, ymm2, 0x04": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b00 0xC2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmne p0.s, p7/z, z17.s, z18.s",
-        "not z0.s, p0/m, z17.s",
-        "movprfx z16.s, p0/z, z17.s",
-        "orr z16.s, p0/m, z16.s, z0.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmne p0.s, p7/z, z2.s, z3.s",
+        "not z0.s, p0/m, z2.s",
+        "movprfx z4.s, p0/z, z2.s",
+        "orr z4.s, p0/m, z4.s, z0.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpps xmm0, xmm1, xmm2, 0x05": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b00 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt v2.4s, v18.4s, v17.4s",
-        "mvn v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt v4.4s, v3.4s, v2.4s",
+        "mvn v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vcmpps ymm0, ymm1, ymm2, 0x05": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b00 0xC2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt p0.s, p7/z, z18.s, z17.s",
-        "not z0.s, p0/m, z18.s",
-        "movprfx z2.s, p0/z, z18.s",
-        "orr z2.s, p0/m, z2.s, z0.s",
-        "not z16.b, p7/m, z2.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt p0.s, p7/z, z3.s, z2.s",
+        "not z0.s, p0/m, z3.s",
+        "movprfx z4.s, p0/z, z3.s",
+        "orr z4.s, p0/m, z4.s, z0.s",
+        "not z2.b, p7/m, z4.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vcmpps xmm0, xmm1, xmm2, 0x06": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b00 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmge v2.4s, v18.4s, v17.4s",
-        "mvn v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmge v4.4s, v3.4s, v2.4s",
+        "mvn v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vcmpps ymm0, ymm1, ymm2, 0x06": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b00 0xC2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmge p0.s, p7/z, z18.s, z17.s",
-        "not z0.s, p0/m, z18.s",
-        "movprfx z2.s, p0/z, z18.s",
-        "orr z2.s, p0/m, z2.s, z0.s",
-        "not z16.b, p7/m, z2.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmge p0.s, p7/z, z3.s, z2.s",
+        "not z0.s, p0/m, z3.s",
+        "movprfx z4.s, p0/z, z3.s",
+        "orr z4.s, p0/m, z4.s, z0.s",
+        "not z2.b, p7/m, z4.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vcmpps xmm0, xmm1, xmm2, 0x07": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b00 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmge v0.4s, v17.4s, v18.4s",
-        "fcmgt v1.4s, v18.4s, v17.4s",
-        "orr v16.16b, v0.16b, v1.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmge v0.4s, v2.4s, v3.4s",
+        "fcmgt v1.4s, v3.4s, v2.4s",
+        "orr v4.16b, v0.16b, v1.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpps ymm0, ymm1, ymm2, 0x07": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b00 0xC2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmuo p0.s, p7/z, z17.s, z18.s",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmuo p0.s, p7/z, z2.s, z3.s",
         "not p0.b, p7/z, p0.b",
-        "not z0.s, p0/m, z17.s",
-        "movprfx z16.s, p0/z, z17.s",
-        "orr z16.s, p0/m, z16.s, z0.s"
+        "not z0.s, p0/m, z2.s",
+        "movprfx z4.s, p0/z, z2.s",
+        "orr z4.s, p0/m, z4.s, z0.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmppd xmm0, xmm1, xmm2, 0x00": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq v16.2d, v17.2d, v18.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmeq v4.2d, v2.2d, v3.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmppd ymm0, ymm1, ymm2, 0x00": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0xC2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq p0.d, p7/z, z17.d, z18.d",
-        "not z0.d, p0/m, z17.d",
-        "movprfx z16.d, p0/z, z17.d",
-        "orr z16.d, p0/m, z16.d, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmeq p0.d, p7/z, z2.d, z3.d",
+        "not z0.d, p0/m, z2.d",
+        "movprfx z4.d, p0/z, z2.d",
+        "orr z4.d, p0/m, z4.d, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmppd xmm0, xmm1, xmm2, 0x01": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt v16.2d, v18.2d, v17.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt v4.2d, v3.2d, v2.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmppd ymm0, ymm1, ymm2, 0x01": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0xC2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt p0.d, p7/z, z18.d, z17.d",
-        "not z0.d, p0/m, z18.d",
-        "movprfx z16.d, p0/z, z18.d",
-        "orr z16.d, p0/m, z16.d, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt p0.d, p7/z, z3.d, z2.d",
+        "not z0.d, p0/m, z3.d",
+        "movprfx z4.d, p0/z, z3.d",
+        "orr z4.d, p0/m, z4.d, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmppd xmm0, xmm1, xmm2, 0x02": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmge v16.2d, v18.2d, v17.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmge v4.2d, v3.2d, v2.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmppd ymm0, ymm1, ymm2, 0x02": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0xC2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmge p0.d, p7/z, z18.d, z17.d",
-        "not z0.d, p0/m, z18.d",
-        "movprfx z16.d, p0/z, z18.d",
-        "orr z16.d, p0/m, z16.d, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmge p0.d, p7/z, z3.d, z2.d",
+        "not z0.d, p0/m, z3.d",
+        "movprfx z4.d, p0/z, z3.d",
+        "orr z4.d, p0/m, z4.d, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmppd xmm0, xmm1, xmm2, 0x03": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmge v0.2d, v17.2d, v18.2d",
-        "fcmgt v1.2d, v18.2d, v17.2d",
-        "orr v16.16b, v0.16b, v1.16b",
-        "mvn v16.16b, v16.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmge v0.2d, v2.2d, v3.2d",
+        "fcmgt v1.2d, v3.2d, v2.2d",
+        "orr v4.16b, v0.16b, v1.16b",
+        "mvn v4.16b, v4.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmppd ymm0, ymm1, ymm2, 0x03": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0xC2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmuo p0.d, p7/z, z17.d, z18.d",
-        "not z0.d, p0/m, z17.d",
-        "movprfx z16.d, p0/z, z17.d",
-        "orr z16.d, p0/m, z16.d, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmuo p0.d, p7/z, z2.d, z3.d",
+        "not z0.d, p0/m, z2.d",
+        "movprfx z4.d, p0/z, z2.d",
+        "orr z4.d, p0/m, z4.d, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmppd xmm0, xmm1, xmm2, 0x04": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmeq v16.2d, v17.2d, v18.2d",
-        "mvn v16.16b, v16.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmeq v4.2d, v2.2d, v3.2d",
+        "mvn v4.16b, v4.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmppd ymm0, ymm1, ymm2, 0x04": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0xC2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmne p0.d, p7/z, z17.d, z18.d",
-        "not z0.d, p0/m, z17.d",
-        "movprfx z16.d, p0/z, z17.d",
-        "orr z16.d, p0/m, z16.d, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmne p0.d, p7/z, z2.d, z3.d",
+        "not z0.d, p0/m, z2.d",
+        "movprfx z4.d, p0/z, z2.d",
+        "orr z4.d, p0/m, z4.d, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmppd xmm0, xmm1, xmm2, 0x05": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt v2.2d, v18.2d, v17.2d",
-        "mvn v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt v4.2d, v3.2d, v2.2d",
+        "mvn v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vcmppd ymm0, ymm1, ymm2, 0x05": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b01 0xC2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt p0.d, p7/z, z18.d, z17.d",
-        "not z0.d, p0/m, z18.d",
-        "movprfx z2.d, p0/z, z18.d",
-        "orr z2.d, p0/m, z2.d, z0.d",
-        "not z16.b, p7/m, z2.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt p0.d, p7/z, z3.d, z2.d",
+        "not z0.d, p0/m, z3.d",
+        "movprfx z4.d, p0/z, z3.d",
+        "orr z4.d, p0/m, z4.d, z0.d",
+        "not z2.b, p7/m, z4.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vcmppd xmm0, xmm1, xmm2, 0x06": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmge v2.2d, v18.2d, v17.2d",
-        "mvn v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmge v4.2d, v3.2d, v2.2d",
+        "mvn v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vcmppd ymm0, ymm1, ymm2, 0x06": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b01 0xC2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmge p0.d, p7/z, z18.d, z17.d",
-        "not z0.d, p0/m, z18.d",
-        "movprfx z2.d, p0/z, z18.d",
-        "orr z2.d, p0/m, z2.d, z0.d",
-        "not z16.b, p7/m, z2.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmge p0.d, p7/z, z3.d, z2.d",
+        "not z0.d, p0/m, z3.d",
+        "movprfx z4.d, p0/z, z3.d",
+        "orr z4.d, p0/m, z4.d, z0.d",
+        "not z2.b, p7/m, z4.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vcmppd xmm0, xmm1, xmm2, 0x07": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmge v0.2d, v17.2d, v18.2d",
-        "fcmgt v1.2d, v18.2d, v17.2d",
-        "orr v16.16b, v0.16b, v1.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmge v0.2d, v2.2d, v3.2d",
+        "fcmgt v1.2d, v3.2d, v2.2d",
+        "orr v4.16b, v0.16b, v1.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmppd ymm0, ymm1, ymm2, 0x07": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b01 0xC2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmuo p0.d, p7/z, z17.d, z18.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmuo p0.d, p7/z, z2.d, z3.d",
         "not p0.b, p7/z, p0.b",
-        "not z0.d, p0/m, z17.d",
-        "movprfx z16.d, p0/z, z17.d",
-        "orr z16.d, p0/m, z16.d, z0.d"
+        "not z0.d, p0/m, z2.d",
+        "movprfx z4.d, p0/z, z2.d",
+        "orr z4.d, p0/m, z4.d, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpss xmm0, xmm1, xmm2, 0x00": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b10 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmeq s0, s17, s18",
-        "mov v16.s[0], v0.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmeq s0, s2, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpss xmm0, xmm1, xmm2, 0x01": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b10 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmgt s0, s18, s17",
-        "mov v16.s[0], v0.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmgt s0, s3, s2",
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpss xmm0, xmm1, xmm2, 0x02": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b10 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmge s0, s18, s17",
-        "mov v16.s[0], v0.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmge s0, s3, s2",
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpss xmm0, xmm1, xmm2, 0x03": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b10 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmge s0, s17, s18",
-        "fcmgt s1, s18, s17",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmge s0, s2, s3",
+        "fcmgt s1, s3, s2",
         "orr v0.8b, v0.8b, v1.8b",
         "mvn v0.8b, v0.8b",
-        "mov v16.s[0], v0.s[0]"
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpss xmm0, xmm1, xmm2, 0x04": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b10 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmeq s0, s17, s18",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmeq s0, s2, s3",
         "mvn v0.8b, v0.8b",
-        "mov v16.s[0], v0.s[0]"
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpss xmm0, xmm1, xmm2, 0x05": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b10 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt s2, s18, s17",
-        "mvn v2.16b, v2.16b",
-        "mov v16.16b, v17.16b",
-        "mov v16.s[0], v2.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt s4, s3, s2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpss xmm0, xmm1, xmm2, 0x06": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b10 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmge s2, s18, s17",
-        "mvn v2.16b, v2.16b",
-        "mov v16.16b, v17.16b",
-        "mov v16.s[0], v2.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmge s4, s3, s2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpss xmm0, xmm1, xmm2, 0x07": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b10 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmge s0, s17, s18",
-        "fcmgt s1, s18, s17",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmge s0, s2, s3",
+        "fcmgt s1, s3, s2",
         "orr v0.8b, v0.8b, v1.8b",
-        "mov v16.s[0], v0.s[0]"
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpsd xmm0, xmm1, xmm2, 0x00": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b11 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmeq d0, d17, d18",
-        "mov v16.d[0], v0.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmeq d0, d2, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpsd xmm0, xmm1, xmm2, 0x01": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b11 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmgt d0, d18, d17",
-        "mov v16.d[0], v0.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmgt d0, d3, d2",
+        "mov v4.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpsd xmm0, xmm1, xmm2, 0x02": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b11 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmge d0, d18, d17",
-        "mov v16.d[0], v0.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmge d0, d3, d2",
+        "mov v4.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpsd xmm0, xmm1, xmm2, 0x03": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b11 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmge d0, d17, d18",
-        "fcmgt d1, d18, d17",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmge d0, d2, d3",
+        "fcmgt d1, d3, d2",
         "orr v0.8b, v0.8b, v1.8b",
         "mvn v0.8b, v0.8b",
-        "mov v16.d[0], v0.d[0]"
+        "mov v4.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpsd xmm0, xmm1, xmm2, 0x04": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b11 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmeq d0, d17, d18",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmeq d0, d2, d3",
         "mvn v0.8b, v0.8b",
-        "mov v16.d[0], v0.d[0]"
+        "mov v4.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpsd xmm0, xmm1, xmm2, 0x05": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b11 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt d2, d18, d17",
-        "mvn v2.16b, v2.16b",
-        "mov v16.16b, v17.16b",
-        "mov v16.d[0], v2.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt d4, d3, d2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpsd xmm0, xmm1, xmm2, 0x06": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b11 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmge d2, d18, d17",
-        "mvn v2.16b, v2.16b",
-        "mov v16.16b, v17.16b",
-        "mov v16.d[0], v2.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmge d4, d3, d2",
+        "mvn v3.16b, v4.16b",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[0], v3.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcmpsd xmm0, xmm1, xmm2, 0x07": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b11 0xC2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcmge d0, d17, d18",
-        "fcmgt d1, d18, d17",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcmge d0, d2, d3",
+        "fcmgt d1, d3, d2",
         "orr v0.8b, v0.8b, v1.8b",
-        "mov v16.d[0], v0.d[0]"
+        "mov v4.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpinsrw xmm0, xmm0, eax, 000b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0xC4 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v16.16b",
-        "mov v2.h[0], w4",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[0], w20",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpinsrw xmm0, xmm1, eax, 000b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0xC4 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "mov v16.h[0], w4"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[0], w20",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpinsrw xmm0, xmm1, eax, 001b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0xC4 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "mov v16.h[1], w4"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[1], w20",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpinsrw xmm0, xmm1, eax, 111b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0xC4 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "mov v16.h[7], w4"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.h[7], w20",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpextrw eax, xmm0, 000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0xC5 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "umov w4, v16.h[0]"
+        "mov z2.d, p7/m, z16.d",
+        "umov w20, v2.h[0]",
+        "mov x4, x20"
       ]
     },
     "vpextrw eax, xmm0, 001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0xC5 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "umov w4, v16.h[1]"
+        "mov z2.d, p7/m, z16.d",
+        "umov w20, v2.h[1]",
+        "mov x4, x20"
       ]
     },
     "vpextrw eax, xmm0, 111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0xC5 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "umov w4, v16.h[7]"
+        "mov z2.d, p7/m, z16.d",
+        "umov w20, v2.h[7]",
+        "mov x4, x20"
       ]
     },
     "vpextrw [rax], xmm0, 000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0xC5 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1 {v16.h}[0], [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1 {v2.h}[0], [x20]"
       ]
     },
     "vpextrw [rax], xmm0, 001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0xC5 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1 {v16.h}[1], [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1 {v2.h}[1], [x20]"
       ]
     },
     "vpextrw [rax], xmm0, 111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0xC5 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1 {v16.h}[7], [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1 {v2.h}[7], [x20]"
       ]
     },
     "vshufps xmm0, xmm1, xmm2, 00b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b00 0xC6 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v17.s[0]",
-        "dup v3.4s, v18.s[0]",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v4.4s, v2.s[0]",
+        "dup v2.4s, v3.s[0]",
+        "zip1 v3.2d, v4.2d, v2.2d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vshufps ymm0, ymm1, ymm2, 00b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 59,
       "Comment": [
         "Map 1 0b00 0xC6 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.s, s17",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z1.s, s2",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s18",
+        "mov z1.s, s3",
+        "mov z2.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
         "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z18.s[4]",
+        "mov z1.s, z3.s[4]",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s18",
+        "mov z1.s, s3",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
         "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z18.s[4]",
-        "mov z16.d, z2.d",
+        "mov z1.s, z3.s[4]",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
-        "mov z16.s, p0/m, z1.s",
-        "msr nzcv, x0"
+        "mov z4.s, p0/m, z1.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vshufps xmm0, xmm1, xmm2, 01b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b00 0xC6 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "ldr x0, [x28, #1768]",
-        "ldr q2, [x0, #16]",
-        "tbl v16.16b, {v17.16b, v18.16b}, v2.16b"
+        "ldr q4, [x0, #16]",
+        "tbl v5.16b, {v2.16b, v3.16b}, v4.16b",
+        "mov z16.d, p7/m, z5.d"
       ]
     },
     "vshufps ymm0, ymm1, ymm2, 01b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 59,
       "Comment": [
         "Map 1 0b00 0xC6 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.s, z17.s[1]",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z1.s, z2.s[1]",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[5]",
+        "mov z1.s, z2.s[5]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s18",
+        "mov z1.s, s3",
+        "mov z2.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
         "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z18.s[4]",
+        "mov z1.s, z3.s[4]",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s18",
+        "mov z1.s, s3",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
         "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z18.s[4]",
-        "mov z16.d, z2.d",
+        "mov z1.s, z3.s[4]",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
-        "mov z16.s, p0/m, z1.s",
-        "msr nzcv, x0"
+        "mov z4.s, p0/m, z1.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vshufps xmm0, xmm1, xmm2, 10b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b00 0xC6 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "ldr x0, [x28, #1768]",
-        "ldr q2, [x0, #32]",
-        "tbl v16.16b, {v17.16b, v18.16b}, v2.16b"
+        "ldr q4, [x0, #32]",
+        "tbl v5.16b, {v2.16b, v3.16b}, v4.16b",
+        "mov z16.d, p7/m, z5.d"
       ]
     },
     "vshufps ymm0, ymm1, ymm2, 10b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 59,
       "Comment": [
         "Map 1 0b00 0xC6 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.s, z17.s[2]",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z1.s, z2.s[2]",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[6]",
+        "mov z1.s, z2.s[6]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s18",
+        "mov z1.s, s3",
+        "mov z2.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
         "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z18.s[4]",
+        "mov z1.s, z3.s[4]",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s18",
+        "mov z1.s, s3",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
         "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z18.s[4]",
-        "mov z16.d, z2.d",
+        "mov z1.s, z3.s[4]",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
-        "mov z16.s, p0/m, z1.s",
-        "msr nzcv, x0"
+        "mov z4.s, p0/m, z1.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vshufps xmm0, xmm1, xmm2, 11b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b00 0xC6 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "ldr x0, [x28, #1768]",
-        "ldr q2, [x0, #48]",
-        "tbl v16.16b, {v17.16b, v18.16b}, v2.16b"
+        "ldr q4, [x0, #48]",
+        "tbl v5.16b, {v2.16b, v3.16b}, v4.16b",
+        "mov z16.d, p7/m, z5.d"
       ]
     },
     "vshufps ymm0, ymm1, ymm2, 11b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 59,
       "Comment": [
         "Map 1 0b00 0xC6 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.s, z17.s[3]",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z1.s, z2.s[3]",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[7]",
+        "mov z1.s, z2.s[7]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s18",
+        "mov z1.s, s3",
+        "mov z2.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
         "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z18.s[4]",
+        "mov z1.s, z3.s[4]",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s18",
+        "mov z1.s, s3",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
         "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z18.s[4]",
-        "mov z16.d, z2.d",
+        "mov z1.s, z3.s[4]",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
-        "mov z16.s, p0/m, z1.s",
-        "msr nzcv, x0"
+        "mov z4.s, p0/m, z1.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vshufpd xmm0, xmm1, xmm2, 0b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xC6 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "zip1 v16.2d, v17.2d, v18.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "zip1 v4.2d, v2.2d, v3.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vshufpd ymm0, ymm1, ymm2, 0b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 1 0b01 0xC6 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.d, d17",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z1.d, d2",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
+        "mov z1.d, z2.d[2]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, d18",
+        "mov z1.d, d3",
+        "mov z2.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
         "mov z2.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[2]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z3.d[2]",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z4.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vshufpd xmm0, xmm1, xmm2, 1b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xC6 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ext v16.16b, v17.16b, v18.16b, #8"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "ext v4.16b, v2.16b, v3.16b, #8",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vshufpd ymm0, ymm1, ymm2, 1b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 1 0b01 0xC6 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.d, z17.d[1]",
-        "mov z2.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z1.d, z2.d[1]",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
+        "mov z1.d, z2.d[2]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, d18",
+        "mov z1.d, d3",
+        "mov z2.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
         "mov z2.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[2]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z3.d[2]",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z4.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmovaps xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x28 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q16, [x4]"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovaps ymm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x28 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1b {z16.b}, p7/z, [x4]"
+        "mov x20, x4",
+        "ld1b {z2.b}, p7/z, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovaps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x29 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vmovaps ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 1 0b00 0x29 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovapd xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x28 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q16, [x4]"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovapd ymm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x28 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1b {z16.b}, p7/z, [x4]"
+        "mov x20, x4",
+        "ld1b {z2.b}, p7/z, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovapd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x29 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vmovapd ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 1 0b01 0x29 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovaps [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x29 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "vmovaps [rax], ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x29 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1b {z16.b}, p7, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1b {z2.b}, p7, [x20]"
       ]
     },
     "vmovapd [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x29 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "vmovapd [rax], ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x29 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1b {z16.b}, p7, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1b {z2.b}, p7, [x20]"
       ]
     },
     "vcvtsi2ss xmm0, xmm1, eax": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b10 0x2A 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "scvtf s0, w4",
-        "mov v16.s[0], v0.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "scvtf s0, w20",
+        "mov v3.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vcvtsi2ss xmm0, xmm1, rax": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b10 0x2A 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "scvtf s0, x4",
-        "mov v16.s[0], v0.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "scvtf s0, x20",
+        "mov v3.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vcvtsi2sd xmm0, xmm1, eax": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b11 0x2A 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "scvtf d0, w4",
-        "mov v16.d[0], v0.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "scvtf d0, w20",
+        "mov v3.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vcvtsi2sd xmm0, xmm1, rax": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b11 0x2A 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "scvtf d0, x4",
-        "mov v16.d[0], v0.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "scvtf d0, x20",
+        "mov v3.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vmovntps [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x2B 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "vmovntps [rax], ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x2B 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1b {z16.b}, p7, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1b {z2.b}, p7, [x20]"
       ]
     },
     "vmovntpd [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x2B 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "vmovntpd [rax], ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x2B 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1b {z16.b}, p7, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1b {z2.b}, p7, [x20]"
       ]
     },
     "vcvttss2si eax, xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b10 0x2c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcvtzs w4, s16"
+        "mov z2.d, p7/m, z16.d",
+        "fcvtzs w20, s2",
+        "mov x4, x20"
       ]
     },
     "vcvttss2si rax, xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b10 0x2c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcvtzs x4, s16"
+        "mov z2.d, p7/m, z16.d",
+        "fcvtzs x20, s2",
+        "mov x4, x20"
       ]
     },
     "vcvttsd2si eax, xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b11 0x2c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcvtzs w4, d16"
+        "mov z2.d, p7/m, z16.d",
+        "fcvtzs w20, d2",
+        "mov x4, x20"
       ]
     },
     "vcvttsd2si rax, xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b11 0x2c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcvtzs x4, d16"
+        "mov z2.d, p7/m, z16.d",
+        "fcvtzs x20, d2",
+        "mov x4, x20"
       ]
     },
     "vcvtss2si eax, xmm0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b10 0x2d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "frinti s0, s16",
-        "fcvtzs w4, s0"
+        "mov z2.d, p7/m, z16.d",
+        "frinti s0, s2",
+        "fcvtzs w20, s0",
+        "mov x4, x20"
       ]
     },
     "vcvtss2si rax, xmm0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b10 0x2d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "frinti s0, s16",
-        "fcvtzs x4, s0"
+        "mov z2.d, p7/m, z16.d",
+        "frinti s0, s2",
+        "fcvtzs x20, s0",
+        "mov x4, x20"
       ]
     },
     "vcvtsd2si eax, xmm0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b11 0x2d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "frinti d0, d16",
-        "fcvtzs x4, d0"
+        "mov z2.d, p7/m, z16.d",
+        "frinti d0, d2",
+        "fcvtzs x20, d0",
+        "mov x4, x20"
       ]
     },
     "vcvtsd2si rax, xmm0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b11 0x2d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "frinti d0, d16",
-        "fcvtzs x4, d0"
+        "mov z2.d, p7/m, z16.d",
+        "frinti d0, d2",
+        "fcvtzs x20, d0",
+        "mov x4, x20"
       ]
     },
     "vucomiss xmm0, xmm1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "Map 1 0b00 0x2e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmp s16, s17",
-        "mov w27, #0x0",
-        "cset w20, eq",
-        "cset w21, lo",
-        "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "fcmp s2, s3",
+        "mov w20, #0x0",
+        "cset w21, eq",
+        "cset w22, lo",
+        "cset w23, vs",
+        "orr w24, w22, w23",
+        "lsl x22, x24, #29",
+        "orr w24, w21, w23",
+        "orr w21, w22, w24, lsl #30",
+        "eor w22, w23, #0x1",
+        "mov x26, x22",
+        "mov x27, x20",
+        "msr nzcv, x21"
       ]
     },
     "vucomisd xmm0, xmm1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "Map 1 0b01 0x2e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmp d16, d17",
-        "mov w27, #0x0",
-        "cset w20, eq",
-        "cset w21, lo",
-        "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "fcmp d2, d3",
+        "mov w20, #0x0",
+        "cset w21, eq",
+        "cset w22, lo",
+        "cset w23, vs",
+        "orr w24, w22, w23",
+        "lsl x22, x24, #29",
+        "orr w24, w21, w23",
+        "orr w21, w22, w24, lsl #30",
+        "eor w22, w23, #0x1",
+        "mov x26, x22",
+        "mov x27, x20",
+        "msr nzcv, x21"
       ]
     },
     "vcomiss xmm0, xmm1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "Map 1 0b00 0x2f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmp s16, s17",
-        "mov w27, #0x0",
-        "cset w20, eq",
-        "cset w21, lo",
-        "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "fcmp s2, s3",
+        "mov w20, #0x0",
+        "cset w21, eq",
+        "cset w22, lo",
+        "cset w23, vs",
+        "orr w24, w22, w23",
+        "lsl x22, x24, #29",
+        "orr w24, w21, w23",
+        "orr w21, w22, w24, lsl #30",
+        "eor w22, w23, #0x1",
+        "mov x26, x22",
+        "mov x27, x20",
+        "msr nzcv, x21"
       ]
     },
     "vcomisd xmm0, xmm1": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "Map 1 0b01 0x2f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmp d16, d17",
-        "mov w27, #0x0",
-        "cset w20, eq",
-        "cset w21, lo",
-        "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "fcmp d2, d3",
+        "mov w20, #0x0",
+        "cset w21, eq",
+        "cset w22, lo",
+        "cset w23, vs",
+        "orr w24, w22, w23",
+        "lsl x22, x24, #29",
+        "orr w24, w21, w23",
+        "orr w21, w22, w24, lsl #30",
+        "eor w22, w23, #0x1",
+        "mov x26, x22",
+        "mov x27, x20",
+        "msr nzcv, x21"
       ]
     },
     "vaddps xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0x58 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fadd v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fadd v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vaddps ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0x58 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fadd z16.s, z17.s, z18.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fadd z4.s, z2.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vaddpd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x58 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fadd v16.2d, v17.2d, v18.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fadd v4.2d, v2.2d, v3.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vaddpd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x58 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fadd z16.d, z17.d, z18.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fadd z4.d, z2.d, z3.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vaddss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b10 0x58 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fadd s0, s17, s18",
-        "mov v16.s[0], v0.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fadd s0, s2, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vaddsd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b11 0x58 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fadd d0, d17, d18",
-        "mov v16.d[0], v0.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fadd d0, d2, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmulps xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0x59 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fmul v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fmul v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmulps ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0x59 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fmul z16.s, z17.s, z18.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fmul z4.s, z2.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmulpd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x59 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fmul v16.2d, v17.2d, v18.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fmul v4.2d, v2.2d, v3.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmulpd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x59 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fmul z16.d, z17.d, z18.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fmul z4.d, z2.d, z3.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmulss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b10 0x59 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fmul s0, s17, s18",
-        "mov v16.s[0], v0.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fmul s0, s2, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmulsd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b11 0x59 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fmul d0, d17, d18",
-        "mov v16.d[0], v0.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fmul d0, d2, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcvtps2pd xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0x5a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcvtl v2.2d, v17.2s",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "fcvtl v3.2d, v2.2s",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vcvtpd2ps xmm0, [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x5a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x4]",
-        "fcvtn v16.2s, v2.2d"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "fcvtn v3.2s, v2.2d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vcvtpd2ps xmm0, yword [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0x5a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1b {z2.b}, p7/z, [x4]",
-        "fcvtnt z2.s, p7/m, z2.d",
-        "uzp2 z2.s, z2.s, z2.s",
-        "mov v16.16b, v2.16b"
+        "mov x20, x4",
+        "ld1b {z2.b}, p7/z, [x20]",
+        "fcvtnt z3.s, p7/m, z2.d",
+        "uzp2 z3.s, z3.s, z3.s",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vcvtpd2ps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x5a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcvtn v16.2s, v17.2d"
+        "mov z2.d, p7/m, z17.d",
+        "fcvtn v3.2s, v2.2d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vcvtss2sd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b10 0x5a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcvt d0, s18",
-        "mov v16.d[0], v0.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcvt d0, s3",
+        "mov v4.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcvtsd2ss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b11 0x5a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fcvt s0, d18",
-        "mov v16.s[0], v0.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fcvt s0, d3",
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcvtdq2ps xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x5b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "scvtf v16.4s, v17.4s"
+        "mov z2.d, p7/m, z17.d",
+        "scvtf v3.4s, v2.4s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vcvtdq2ps ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b00 0x5b 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "scvtf z16.s, p7/m, z17.s"
+        "mov z2.d, p7/m, z17.d",
+        "scvtf z3.s, p7/m, z2.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vcvtps2dq xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x5b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "frinti v16.4s, v17.4s",
-        "fcvtzs v16.4s, v16.4s"
+        "mov z2.d, p7/m, z17.d",
+        "frinti v3.4s, v2.4s",
+        "fcvtzs v3.4s, v3.4s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vcvtps2dq ymm0, ymm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x5b 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "frinti z16.s, p7/m, z17.s",
-        "fcvtzs z16.s, p7/m, z16.s"
+        "mov z2.d, p7/m, z17.d",
+        "frinti z3.s, p7/m, z2.s",
+        "fcvtzs z3.s, p7/m, z3.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vcvttps2dq xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b10 0x5b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcvtzs v16.4s, v17.4s"
+        "mov z2.d, p7/m, z17.d",
+        "fcvtzs v3.4s, v2.4s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vcvttps2dq ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b10 0x5b 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcvtzs z16.s, p7/m, z17.s"
+        "mov z2.d, p7/m, z17.d",
+        "fcvtzs z3.s, p7/m, z2.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vsubps xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0x5c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fsub v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fsub v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vsubps ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0x5c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fsub z16.s, z17.s, z18.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fsub z4.s, z2.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vsubpd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x5c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fsub v16.2d, v17.2d, v18.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fsub v4.2d, v2.2d, v3.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vsubpd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x5c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fsub z16.d, z17.d, z18.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fsub z4.d, z2.d, z3.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vsubss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b10 0x5c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fsub s0, s17, s18",
-        "mov v16.s[0], v0.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fsub s0, s2, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vsubsd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b11 0x5c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fsub d0, d17, d18",
-        "mov v16.d[0], v0.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fsub d0, d2, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vminps xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b00 0x5d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt v0.4s, v18.4s, v17.4s",
-        "mov v16.16b, v17.16b",
-        "bif v16.16b, v18.16b, v0.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt v0.4s, v3.4s, v2.4s",
+        "mov v4.16b, v2.16b",
+        "bif v4.16b, v3.16b, v0.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vminps ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b00 0x5d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt p0.s, p7/z, z18.s, z17.s",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt p0.s, p7/z, z3.s, z2.s",
         "not p0.b, p7/z, p0.b",
-        "mov z0.d, z17.d",
-        "mov z0.s, p0/m, z18.s",
-        "mov z16.d, z0.d"
+        "mov z0.d, z2.d",
+        "mov z0.s, p0/m, z3.s",
+        "mov z4.d, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vminpd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0x5d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt v0.2d, v18.2d, v17.2d",
-        "mov v16.16b, v17.16b",
-        "bif v16.16b, v18.16b, v0.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt v0.2d, v3.2d, v2.2d",
+        "mov v4.16b, v2.16b",
+        "bif v4.16b, v3.16b, v0.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vminpd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b01 0x5d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt p0.d, p7/z, z18.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt p0.d, p7/z, z3.d, z2.d",
         "not p0.b, p7/z, p0.b",
-        "mov z0.d, z17.d",
-        "mov z0.d, p0/m, z18.d",
-        "mov z16.d, z0.d"
+        "mov z0.d, z2.d",
+        "mov z0.d, p0/m, z3.d",
+        "mov z4.d, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vminss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b10 0x5d 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mrs x20, nzcv",
-        "mov v16.16b, v17.16b",
-        "fcmp s17, s18",
-        "fcsel s0, s17, s18, mi",
-        "mov v16.s[0], v0.s[0]",
+        "mov v4.16b, v2.16b",
+        "fcmp s2, s3",
+        "fcsel s0, s2, s3, mi",
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d",
         "msr nzcv, x20"
       ]
     },
     "vminsd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b11 0x5d 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mrs x20, nzcv",
-        "mov v16.16b, v17.16b",
-        "fcmp d17, d18",
-        "fcsel d0, d17, d18, mi",
-        "mov v16.d[0], v0.d[0]",
+        "mov v4.16b, v2.16b",
+        "fcmp d2, d3",
+        "fcsel d0, d2, d3, mi",
+        "mov v4.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z4.d",
         "msr nzcv, x20"
       ]
     },
     "vdivps xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b00 0x5e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fdiv v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fdiv v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vdivps ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b00 0x5e 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fdiv z16.s, p7/m, z16.s, z18.s"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "fdiv z4.s, p7/m, z4.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vdivps ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b00 0x5e 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z0, z17",
-        "fdiv z0.s, p7/m, z0.s, z16.s",
-        "mov z16.d, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "movprfx z4, z2",
+        "fdiv z4.s, p7/m, z4.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vdivps ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b00 0x5e 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "fdiv z16.s, p7/m, z16.s, z18.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "fdiv z4.s, p7/m, z4.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vdivpd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x5e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fdiv v16.2d, v17.2d, v18.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fdiv v4.2d, v2.2d, v3.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vdivpd ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b01 0x5e 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z0, z17",
-        "fdiv z0.d, p7/m, z0.d, z16.d",
-        "mov z16.d, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "movprfx z4, z2",
+        "fdiv z4.d, p7/m, z4.d, z3.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vdivpd ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b01 0x5e 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fdiv z16.d, p7/m, z16.d, z18.d"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "fdiv z4.d, p7/m, z4.d, z3.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vdivpd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0x5e 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "fdiv z16.d, p7/m, z16.d, z18.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "fdiv z4.d, p7/m, z4.d, z3.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vdivss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b10 0x5e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fdiv s0, s17, s18",
-        "mov v16.s[0], v0.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fdiv s0, s2, s3",
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vdivsd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b11 0x5e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "fdiv d0, d17, d18",
-        "mov v16.d[0], v0.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "fdiv d0, d2, d3",
+        "mov v4.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmaxps xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b00 0x5f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt v0.4s, v18.4s, v17.4s",
-        "mov v16.16b, v17.16b",
-        "bit v16.16b, v18.16b, v0.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt v0.4s, v3.4s, v2.4s",
+        "mov v4.16b, v2.16b",
+        "bit v4.16b, v3.16b, v0.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmaxps ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b00 0x5f 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt p0.s, p7/z, z18.s, z17.s",
-        "mov z0.d, z17.d",
-        "mov z0.s, p0/m, z18.s",
-        "mov z16.d, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt p0.s, p7/z, z3.s, z2.s",
+        "mov z0.d, z2.d",
+        "mov z0.s, p0/m, z3.s",
+        "mov z4.d, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmaxpd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0x5f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt v0.2d, v18.2d, v17.2d",
-        "mov v16.16b, v17.16b",
-        "bit v16.16b, v18.16b, v0.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt v0.2d, v3.2d, v2.2d",
+        "mov v4.16b, v2.16b",
+        "bit v4.16b, v3.16b, v0.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmaxpd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0x5f 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcmgt p0.d, p7/z, z18.d, z17.d",
-        "mov z0.d, z17.d",
-        "mov z0.d, p0/m, z18.d",
-        "mov z16.d, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fcmgt p0.d, p7/z, z3.d, z2.d",
+        "mov z0.d, z2.d",
+        "mov z0.d, p0/m, z3.d",
+        "mov z4.d, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmaxss xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b10 0x5f 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mrs x20, nzcv",
-        "mov v16.16b, v17.16b",
-        "fcmp s17, s18",
-        "fcsel s0, s18, s17, mi",
-        "mov v16.s[0], v0.s[0]",
+        "mov v4.16b, v2.16b",
+        "fcmp s2, s3",
+        "fcsel s0, s3, s2, mi",
+        "mov v4.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z4.d",
         "msr nzcv, x20"
       ]
     },
     "vmaxsd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b11 0x5f 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mrs x20, nzcv",
-        "mov v16.16b, v17.16b",
-        "fcmp d17, d18",
-        "fcsel d0, d18, d17, mi",
-        "mov v16.d[0], v0.d[0]",
+        "mov v4.16b, v2.16b",
+        "fcmp d2, d3",
+        "fcsel d0, d3, d2, mi",
+        "mov v4.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z4.d",
         "msr nzcv, x20"
       ]
     },
     "vpunpckhbw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x68 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "zip2 v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "zip2 v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpunpckhbw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b01 0x68 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "zip1 z2.b, z17.b, z18.b",
-        "zip2 z3.b, z17.b, z18.b",
-        "mov z1.q, z2.q[1]",
-        "mov z16.d, z3.d",
-        "mov z16.b, p6/m, z1.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "zip1 z4.b, z2.b, z3.b",
+        "zip2 z5.b, z2.b, z3.b",
+        "mov z1.q, z4.q[1]",
+        "mov z2.d, z5.d",
+        "mov z2.b, p6/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpunpckhwd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x69 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "zip2 v16.8h, v17.8h, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "zip2 v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpunpckhwd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b01 0x69 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "zip1 z2.h, z17.h, z18.h",
-        "zip2 z3.h, z17.h, z18.h",
-        "mov z1.q, z2.q[1]",
-        "mov z16.d, z3.d",
-        "mov z16.b, p6/m, z1.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "zip1 z4.h, z2.h, z3.h",
+        "zip2 z5.h, z2.h, z3.h",
+        "mov z1.q, z4.q[1]",
+        "mov z2.d, z5.d",
+        "mov z2.b, p6/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpunpckhdq xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x6a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "zip2 v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "zip2 v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpunpckhdq ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b01 0x6a 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "zip1 z2.s, z17.s, z18.s",
-        "zip2 z3.s, z17.s, z18.s",
-        "mov z1.q, z2.q[1]",
-        "mov z16.d, z3.d",
-        "mov z16.b, p6/m, z1.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "zip1 z4.s, z2.s, z3.s",
+        "zip2 z5.s, z2.s, z3.s",
+        "mov z1.q, z4.q[1]",
+        "mov z2.d, z5.d",
+        "mov z2.b, p6/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpackssdw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0x6b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqxtn v16.4h, v17.4s",
-        "sqxtn2 v16.8h, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqxtn v4.4h, v2.4s",
+        "sqxtn2 v4.8h, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpackssdw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 22,
       "Comment": [
         "Map 1 0b01 0x6b 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqxtnb z1.h, z18.s",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqxtnb z1.h, z3.s",
         "uzp1 z1.h, z1.h, z1.h",
-        "sqxtnb z2.h, z17.s",
-        "uzp1 z2.h, z2.h, z2.h",
-        "splice z2.h, p6, z2.h, z1.h",
-        "mov z1.d, z2.d[1]",
-        "mov z3.d, z2.d",
+        "sqxtnb z4.h, z2.s",
+        "uzp1 z4.h, z4.h, z4.h",
+        "splice z4.h, p6, z4.h, z1.h",
+        "mov z1.d, z4.d[1]",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z3.d, p0/m, z1.d",
+        "mov z2.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z2.d[2]",
-        "mov z16.d, z3.d",
+        "mov z1.d, z4.d[2]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpunpcklqdq xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x6c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "zip1 v16.2d, v17.2d, v18.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "zip1 v4.2d, v2.2d, v3.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpunpcklqdq ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b01 0x6c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "zip1 z2.d, z17.d, z18.d",
-        "zip2 z3.d, z17.d, z18.d",
-        "mov z1.q, q3",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "zip1 z4.d, z2.d, z3.d",
+        "zip2 z5.d, z2.d, z3.d",
+        "mov z1.q, q5",
+        "mov z2.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpunpckhqdq xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x6d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "zip2 v16.2d, v17.2d, v18.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "zip2 v4.2d, v2.2d, v3.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpunpckhqdq ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b01 0x6d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "zip1 z2.d, z17.d, z18.d",
-        "zip2 z3.d, z17.d, z18.d",
-        "mov z1.q, z2.q[1]",
-        "mov z16.d, z3.d",
-        "mov z16.b, p6/m, z1.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "zip1 z4.d, z2.d, z3.d",
+        "zip2 z5.d, z2.d, z3.d",
+        "mov z1.q, z4.q[1]",
+        "mov z2.d, z5.d",
+        "mov z2.b, p6/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovd xmm0, dword [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x6e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr s16, [x4]"
+        "mov x20, x4",
+        "ldr s2, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovq xmm0, qword [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x6e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr d16, [x4]"
+        "mov x20, x4",
+        "ldr d2, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovdqa xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x6f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q16, [x4]"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovdqa [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x6f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "vmovdqu xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b10 0x6f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q16, [x4]"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovdqu [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b10 0x6f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "vhaddpd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0x7c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "faddp v16.2d, v17.2d, v18.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "faddp v4.2d, v2.2d, v3.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vhaddpd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 22,
       "Comment": [
         "Map 1 0b01 0x7c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z0, z17",
-        "faddp z0.d, p7/m, z0.d, z18.d",
-        "uzp1 z2.d, z0.d, z0.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z0, z2",
+        "faddp z0.d, p7/m, z0.d, z3.d",
+        "uzp1 z4.d, z0.d, z0.d",
         "uzp2 z1.d, z0.d, z0.d",
-        "splice z2.d, p6, z2.d, z1.d",
-        "mov z1.d, z2.d[2]",
-        "mov z3.d, z2.d",
+        "splice z4.d, p6, z4.d, z1.d",
+        "mov z1.d, z4.d[2]",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z3.d, p0/m, z1.d",
+        "mov z2.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z2.d[1]",
-        "mov z16.d, z3.d",
+        "mov z1.d, z4.d[1]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vhaddps xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b11 0x7c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "faddp v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "faddp v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vhaddps ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 22,
       "Comment": [
         "Map 1 0b11 0x7c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z0, z17",
-        "faddp z0.s, p7/m, z0.s, z18.s",
-        "uzp1 z2.s, z0.s, z0.s",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z0, z2",
+        "faddp z0.s, p7/m, z0.s, z3.s",
+        "uzp1 z4.s, z0.s, z0.s",
         "uzp2 z1.s, z0.s, z0.s",
-        "splice z2.d, p6, z2.d, z1.d",
-        "mov z1.d, z2.d[2]",
-        "mov z3.d, z2.d",
+        "splice z4.d, p6, z4.d, z1.d",
+        "mov z1.d, z4.d[2]",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z3.d, p0/m, z1.d",
+        "mov z2.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z2.d[1]",
-        "mov z16.d, z3.d",
+        "mov z1.d, z4.d[1]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vhsubpd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0x7d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 v2.2d, v17.2d, v18.2d",
-        "uzp2 v3.2d, v17.2d, v18.2d",
-        "fsub v16.2d, v2.2d, v3.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uzp1 v4.2d, v2.2d, v3.2d",
+        "uzp2 v5.2d, v2.2d, v3.2d",
+        "fsub v2.2d, v4.2d, v5.2d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vhsubpd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "Map 1 0b01 0x7d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 z2.d, z17.d, z18.d",
-        "uzp2 z3.d, z17.d, z18.d",
-        "fsub z2.d, z2.d, z3.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uzp1 z4.d, z2.d, z3.d",
+        "uzp2 z5.d, z2.d, z3.d",
+        "fsub z2.d, z4.d, z5.d",
         "mov z1.d, z2.d[2]",
         "mov z3.d, z2.d",
         "mrs x0, nzcv",
@@ -4231,34 +5187,40 @@
         "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
         "mov z1.d, z2.d[1]",
-        "mov z16.d, z3.d",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z4.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vhsubps xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b11 0x7d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 v2.4s, v17.4s, v18.4s",
-        "uzp2 v3.4s, v17.4s, v18.4s",
-        "fsub v16.4s, v2.4s, v3.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uzp1 v4.4s, v2.4s, v3.4s",
+        "uzp2 v5.4s, v2.4s, v3.4s",
+        "fsub v2.4s, v4.4s, v5.4s",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vhsubps ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "Map 1 0b11 0x7d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 z2.s, z17.s, z18.s",
-        "uzp2 z3.s, z17.s, z18.s",
-        "fsub z2.s, z2.s, z3.s",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uzp1 z4.s, z2.s, z3.s",
+        "uzp2 z5.s, z2.s, z3.s",
+        "fsub z2.s, z4.s, z5.s",
         "mov z1.d, z2.d[2]",
         "mov z3.d, z2.d",
         "mrs x0, nzcv",
@@ -4267,1076 +5229,1373 @@
         "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
         "mov z1.d, z2.d[1]",
-        "mov z16.d, z3.d",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z4.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmovd dword [rax], xmm0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0x7e 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z16.d",
         "movi v0.2d, #0x0",
-        "mov v0.s[0], v16.s[0]",
-        "mov v2.16b, v0.16b",
-        "str s2, [x4]"
+        "mov v0.s[0], v2.s[0]",
+        "mov v3.16b, v0.16b",
+        "mov x20, x4",
+        "str s3, [x20]"
       ]
     },
     "vmovq qword [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x7e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "str d16, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "str d2, [x20]"
       ]
     },
     "vmovdqa ymm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x7f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1b {z16.b}, p7/z, [x4]"
+        "mov x20, x4",
+        "ld1b {z2.b}, p7/z, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovdqa [rax], ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0x7f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1b {z16.b}, p7, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1b {z2.b}, p7, [x20]"
       ]
     },
     "vmovdqu ymm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b10 0x7f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1b {z16.b}, p7/z, [x4]"
+        "mov x20, x4",
+        "ld1b {z2.b}, p7/z, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovdqu [rax], ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b10 0x7f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1b {z16.b}, p7, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1b {z2.b}, p7, [x20]"
       ]
     },
     "vaddsubpd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0xd0 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2112]",
-        "eor v2.16b, v18.16b, v2.16b",
-        "fadd v16.2d, v17.2d, v2.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "ldr q4, [x28, #2112]",
+        "eor v5.16b, v3.16b, v4.16b",
+        "fadd v3.2d, v2.2d, v5.2d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vaddsubpd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0xd0 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "ldr x0, [x28, #1600]",
-        "ld1b {z2.b}, p7/z, [x0]",
-        "eor z2.d, z18.d, z2.d",
-        "fadd z16.d, z17.d, z2.d"
+        "ld1b {z4.b}, p7/z, [x0]",
+        "eor z5.d, z3.d, z4.d",
+        "fadd z3.d, z2.d, z5.d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vaddsubps xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b11 0xd0 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2080]",
-        "eor v2.16b, v18.16b, v2.16b",
-        "fadd v16.4s, v17.4s, v2.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "ldr q4, [x28, #2080]",
+        "eor v5.16b, v3.16b, v4.16b",
+        "fadd v3.4s, v2.4s, v5.4s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vaddsubps ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b11 0xd0 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "ldr x0, [x28, #1584]",
-        "ld1b {z2.b}, p7/z, [x0]",
-        "eor z2.d, z18.d, z2.d",
-        "fadd z16.s, z17.s, z2.s"
+        "ld1b {z4.b}, p7/z, [x0]",
+        "eor z5.d, z3.d, z4.d",
+        "fadd z3.s, z2.s, z5.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrlw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0xd1 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z0.d, d18",
-        "movprfx z2, z17",
-        "lsr z2.h, p6/m, z2.h, z0.d",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsr z4.h, p6/m, z4.h, z0.d",
+        "mov v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsrlw ymm0, ymm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0xd1 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z0.d, d18",
-        "movprfx z16, z17",
-        "lsr z16.h, p7/m, z16.h, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsr z4.h, p7/m, z4.h, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsrld xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0xd2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z0.d, d18",
-        "movprfx z2, z17",
-        "lsr z2.s, p6/m, z2.s, z0.d",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsr z4.s, p6/m, z4.s, z0.d",
+        "mov v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsrld ymm0, ymm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0xd2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z0.d, d18",
-        "movprfx z16, z17",
-        "lsr z16.s, p7/m, z16.s, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsr z4.s, p7/m, z4.s, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsrlq xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0xd3 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z0.d, d18",
-        "movprfx z2, z17",
-        "lsr z2.d, p6/m, z2.d, z0.d",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsr z4.d, p6/m, z4.d, z0.d",
+        "mov v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsrlq ymm0, ymm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0xd3 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z0.d, d18",
-        "movprfx z16, z17",
-        "lsr z16.d, p7/m, z16.d, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsr z4.d, p7/m, z4.d, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpaddq xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xd4 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "add v16.2d, v17.2d, v18.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "add v4.2d, v2.2d, v3.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpaddq ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xd4 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "add z16.d, z17.d, z18.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "add z4.d, z2.d, z3.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmullw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xd5 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mul v16.8h, v17.8h, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mul v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmullw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xd4 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mul z16.h, z17.h, z18.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mul z4.h, z2.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmovq [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0xd6 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "str d16, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "str d2, [x20]"
       ]
     },
     "vpmovmskb rax, xmm0": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 1 0b01 0xd7 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2272]",
-        "cmlt v3.16b, v16.16b, #0",
-        "and v2.16b, v3.16b, v2.16b",
-        "addp v2.16b, v2.16b, v2.16b",
-        "addp v2.8b, v2.8b, v2.8b",
-        "addp v2.8b, v2.8b, v2.8b",
-        "umov w4, v2.h[0]"
+        "mov z2.d, p7/m, z16.d",
+        "ldr q3, [x28, #2272]",
+        "cmlt v4.16b, v2.16b, #0",
+        "and v2.16b, v4.16b, v3.16b",
+        "addp v3.16b, v2.16b, v2.16b",
+        "addp v2.8b, v3.8b, v3.8b",
+        "addp v3.8b, v2.8b, v2.8b",
+        "umov w20, v3.h[0]",
+        "mov x4, x20"
       ]
     },
     "vpmovmskb rax, ymm0": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "Map 1 0b01 0xd7 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z16.d",
         "ldr x0, [x28, #1680]",
-        "ld1b {z2.b}, p7/z, [x0]",
+        "ld1b {z3.b}, p7/z, [x0]",
         "mrs x0, nzcv",
         "mov z0.d, #0",
-        "cmplt p0.b, p7/z, z16.b, #0",
-        "not z0.b, p0/m, z16.b",
-        "orr z0.b, p0/m, z0.b, z16.b",
-        "mov z3.d, z0.d",
+        "cmplt p0.b, p7/z, z2.b, #0",
+        "not z0.b, p0/m, z2.b",
+        "orr z0.b, p0/m, z0.b, z2.b",
+        "mov z4.d, z0.d",
         "msr nzcv, x0",
-        "and z2.d, z3.d, z2.d",
+        "and z2.d, z4.d, z3.d",
         "movprfx z0, z2",
         "addp z0.b, p7/m, z0.b, z2.b",
-        "uzp1 z2.b, z0.b, z0.b",
+        "uzp1 z3.b, z0.b, z0.b",
         "uzp2 z1.b, z0.b, z0.b",
-        "splice z2.d, p6, z2.d, z1.d",
-        "addp v2.16b, v2.16b, v2.16b",
-        "addp v2.8b, v2.8b, v2.8b",
-        "mov w4, v2.s[0]"
+        "splice z3.d, p6, z3.d, z1.d",
+        "addp v2.16b, v3.16b, v3.16b",
+        "addp v3.8b, v2.8b, v2.8b",
+        "mov w20, v3.s[0]",
+        "mov x4, x20"
       ]
     },
     "vpsubusb xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xd8 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uqsub v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uqsub v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsubusb ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xd8 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uqsub z16.b, z17.b, z18.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uqsub z4.b, z2.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsubusw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xd9 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uqsub v16.8h, v17.8h, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uqsub v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsubusw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xd9 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uqsub z16.h, z17.h, z18.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uqsub z4.h, z2.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminub xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xda 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "umin v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "umin v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminub ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b01 0xda 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "umin z16.b, p7/m, z16.b, z17.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "movprfx z4, z2",
+        "umin z4.b, p7/m, z4.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminub ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b01 0xda 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "umin z16.b, p7/m, z16.b, z18.b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "umin z4.b, p7/m, z4.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminub ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0xda 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "umin z16.b, p7/m, z16.b, z18.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "umin z4.b, p7/m, z4.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpand xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xdb 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "and v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "and v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpand ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xdb 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "and z16.d, z17.d, z18.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "and z4.d, z2.d, z3.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpaddusb xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xdc 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uqadd v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uqadd v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpaddusb ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xdc 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uqadd z16.b, z17.b, z18.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uqadd z4.b, z2.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpaddusw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xdd 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uqadd v16.8h, v17.8h, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uqadd v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpaddusw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xdd 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uqadd z16.h, z17.h, z18.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uqadd z4.h, z2.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxub xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b01 0xdd 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "umax v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "umax v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxub ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b01 0xde 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "umax z16.b, p7/m, z16.b, z18.b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "umax z4.b, p7/m, z4.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxub ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b01 0xde 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "umax z16.b, p7/m, z16.b, z17.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "movprfx z4, z2",
+        "umax z4.b, p7/m, z4.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxub ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0xde 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "umax z16.b, p7/m, z16.b, z18.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "umax z4.b, p7/m, z4.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpandn xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xdf 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "bic v16.16b, v18.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "bic v4.16b, v3.16b, v2.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpandn ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xdf 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "bic z16.d, z18.d, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "bic z4.d, z3.d, z2.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpavgb xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xe0 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "urhadd v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "urhadd v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpavgb ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b01 0xe0 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "urhadd z16.b, p7/m, z16.b, z17.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "movprfx z4, z2",
+        "urhadd z4.b, p7/m, z4.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpavgb ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b01 0xe0 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "urhadd z16.b, p7/m, z16.b, z18.b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "urhadd z4.b, p7/m, z4.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpavgb ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0xe0 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "urhadd z16.b, p7/m, z16.b, z18.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "urhadd z4.b, p7/m, z4.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsraw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0xe1 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z0.d, d18",
-        "movprfx z2, z17",
-        "asr z2.h, p6/m, z2.h, z0.d",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "asr z4.h, p6/m, z4.h, z0.d",
+        "mov v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsraw ymm0, ymm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0xe1 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z0.d, d18",
-        "movprfx z16, z17",
-        "asr z16.h, p7/m, z16.h, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "asr z4.h, p7/m, z4.h, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsrad xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0xe2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z0.d, d18",
-        "movprfx z2, z17",
-        "asr z2.s, p6/m, z2.s, z0.d",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "asr z4.s, p6/m, z4.s, z0.d",
+        "mov v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsrad ymm0, ymm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0xe2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z0.d, d18",
-        "movprfx z16, z17",
-        "asr z16.s, p7/m, z16.s, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "asr z4.s, p7/m, z4.s, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpavgw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xe3 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "urhadd v16.8h, v17.8h, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "urhadd v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpavgw ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b01 0xe3 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "urhadd z16.h, p7/m, z16.h, z17.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "movprfx z4, z2",
+        "urhadd z4.h, p7/m, z4.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpavgw ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b01 0xe3 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "urhadd z16.h, p7/m, z16.h, z18.h"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "urhadd z4.h, p7/m, z4.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpavgw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0xe3 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "urhadd z16.h, p7/m, z16.h, z18.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "urhadd z4.h, p7/m, z4.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmulhuw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0xe4 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z2, z17",
-        "umulh z2.h, p6/m, z2.h, z18.h",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "umulh z4.h, p6/m, z4.h, z3.h",
+        "mov v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpmulhuw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xe4 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "umulh z16.h, z17.h, z18.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "umulh z4.h, z2.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmulhw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0xe5 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z2, z17",
-        "smulh z2.h, p6/m, z2.h, z18.h",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "smulh z4.h, p6/m, z4.h, z3.h",
+        "mov v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpmulhw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xe5 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "smulh z16.h, z17.h, z18.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "smulh z4.h, z2.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vcvttpd2dq xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0xe6 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcvtn v2.2s, v17.2d",
-        "fcvtzs v2.4s, v2.4s",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "fcvtn v3.2s, v2.2d",
+        "fcvtzs v2.4s, v3.4s",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vcvttpd2dq xmm0, ymm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0xe6 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcvtnt z2.s, p7/m, z17.d",
-        "uzp2 z2.s, z2.s, z2.s",
-        "fcvtzs z2.s, p7/m, z2.s",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "fcvtnt z3.s, p7/m, z2.d",
+        "uzp2 z3.s, z3.s, z3.s",
+        "fcvtzs z2.s, p7/m, z3.s",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vcvtdq2pd xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b10 0xe6 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sxtl v2.2d, v17.2s",
-        "scvtf v16.2d, v2.2d"
+        "mov z2.d, p7/m, z17.d",
+        "sxtl v3.2d, v2.2s",
+        "scvtf v2.2d, v3.2d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vcvtdq2pd ymm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b10 0xe6 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sunpklo z2.d, z17.s",
-        "scvtf z16.d, p7/m, z2.d"
+        "mov z2.d, p7/m, z17.d",
+        "sunpklo z3.d, z2.s",
+        "scvtf z2.d, p7/m, z3.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vcvtpd2dq xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b11 0xe6 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcvtn v2.2s, v17.2d",
-        "frinti v2.4s, v2.4s",
+        "mov z2.d, p7/m, z17.d",
+        "fcvtn v3.2s, v2.2d",
+        "frinti v2.4s, v3.4s",
         "fcvtzs v2.4s, v2.4s",
-        "mov v16.16b, v2.16b"
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vcvtpd2dq xmm0, ymm1": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b11 0xe6 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "fcvtnt z2.s, p7/m, z17.d",
-        "uzp2 z2.s, z2.s, z2.s",
-        "frinti z2.s, p7/m, z2.s",
+        "mov z2.d, p7/m, z17.d",
+        "fcvtnt z3.s, p7/m, z2.d",
+        "uzp2 z3.s, z3.s, z3.s",
+        "frinti z2.s, p7/m, z3.s",
         "fcvtzs z2.s, p7/m, z2.s",
-        "mov v16.16b, v2.16b"
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vmovntdq [rax], xmm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0xe7 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "str q16, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "str q2, [x20]"
       ]
     },
     "vmovntdq [rax], ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b01 0xe7 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1b {z16.b}, p7, [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1b {z2.b}, p7, [x20]"
       ]
     },
     "vpsubsb xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xe8 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqsub v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqsub v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsubsb ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xe8 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqsub z16.b, z17.b, z18.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqsub z4.b, z2.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsubsw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xe9 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqsub v16.8h, v17.8h, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqsub v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsubsw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xe9 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqsub z16.h, z17.h, z18.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqsub z4.h, z2.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminsw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xea 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "smin v16.8h, v17.8h, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "smin v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminsw ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b01 0xea 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "smin z16.h, p7/m, z16.h, z17.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "movprfx z4, z2",
+        "smin z4.h, p7/m, z4.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminsw ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b01 0xea 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "smin z16.h, p7/m, z16.h, z18.h"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "smin z4.h, p7/m, z4.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminsw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0xea 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "smin z16.h, p7/m, z16.h, z18.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "smin z4.h, p7/m, z4.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpor xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xeb 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "orr v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "orr v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpor ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xeb 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "orr z16.d, z17.d, z18.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "orr z4.d, z2.d, z3.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpaddsb xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xec 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqadd v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqadd v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpaddsb ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xec 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqadd z16.b, z17.b, z18.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqadd z4.b, z2.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpaddsw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xed 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqadd v16.8h, v17.8h, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqadd v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpaddsw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xed 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqadd z16.h, z17.h, z18.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqadd z4.h, z2.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxsw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xee 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "smax v16.8h, v17.8h, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "smax v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxsw ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b01 0xee 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "smax z16.h, p7/m, z16.h, z17.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "movprfx z4, z2",
+        "smax z4.h, p7/m, z4.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxsw ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b01 0xee 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "smax z16.h, p7/m, z16.h, z18.h"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "smax z4.h, p7/m, z4.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxsw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0xee 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "smax z16.h, p7/m, z16.h, z18.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "smax z4.h, p7/m, z4.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpxor xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xef 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "eor v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "eor v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpxor ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xef 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "eor z16.d, z17.d, z18.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "eor z4.d, z2.d, z3.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vlddqu xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b11 0xf0 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q16, [x4]"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vlddqu ymm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 1 0b11 0xf0 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1b {z16.b}, p7/z, [x4]"
+        "mov x20, x4",
+        "ld1b {z2.b}, p7/z, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsllw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0xf1 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z0.d, d18",
-        "movprfx z2, z17",
-        "lsl z2.h, p6/m, z2.h, z0.d",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsl z4.h, p6/m, z4.h, z0.d",
+        "mov v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsllw ymm0, ymm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0xf1 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z0.d, d18",
-        "movprfx z16, z17",
-        "lsl z16.h, p7/m, z16.h, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsl z4.h, p7/m, z4.h, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpslld xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0xf2 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z0.d, d18",
-        "movprfx z2, z17",
-        "lsl z2.s, p6/m, z2.s, z0.d",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsl z4.s, p6/m, z4.s, z0.d",
+        "mov v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpslld ymm0, ymm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0xf2 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z0.d, d18",
-        "movprfx z16, z17",
-        "lsl z16.s, p7/m, z16.s, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsl z4.s, p7/m, z4.s, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsllq xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0xf3 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z0.d, d18",
-        "movprfx z2, z17",
-        "lsl z2.d, p6/m, z2.d, z0.d",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsl z4.d, p6/m, z4.d, z0.d",
+        "mov v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsllq ymm0, ymm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0xf3 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z0.d, d18",
-        "movprfx z16, z17",
-        "lsl z16.d, p7/m, z16.d, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z0.d, d3",
+        "movprfx z4, z2",
+        "lsl z4.d, p7/m, z4.d, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmuludq xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0xf4 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 v2.4s, v17.4s, v17.4s",
-        "uzp1 v3.4s, v18.4s, v18.4s",
-        "umull v16.2d, v2.2s, v3.2s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uzp1 v4.4s, v2.4s, v2.4s",
+        "uzp1 v2.4s, v3.4s, v3.4s",
+        "umull v3.2d, v4.2s, v2.2s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmuludq ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b01 0xf4 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 z2.s, z17.s, z17.s",
-        "uzp1 z3.s, z18.s, z18.s",
-        "umullb z0.d, z2.s, z3.s",
-        "umullt z1.d, z2.s, z3.s",
-        "zip1 z16.d, z0.d, z1.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uzp1 z4.s, z2.s, z2.s",
+        "uzp1 z2.s, z3.s, z3.s",
+        "umullb z0.d, z4.s, z2.s",
+        "umullt z1.d, z4.s, z2.s",
+        "zip1 z3.d, z0.d, z1.d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmaddwd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b01 0xf5 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "smull v2.4s, v17.4h, v18.4h",
-        "smull2 v3.4s, v17.8h, v18.8h",
-        "addp v16.4s, v2.4s, v3.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "smull v4.4s, v2.4h, v3.4h",
+        "smull2 v5.4s, v2.8h, v3.8h",
+        "addp v2.4s, v4.4s, v5.4s",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpmaddwd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "Map 1 0b01 0xf5 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "smullb z0.s, z17.h, z18.h",
-        "smullt z1.s, z17.h, z18.h",
-        "zip1 z2.s, z0.s, z1.s",
-        "smullb z0.s, z17.h, z18.h",
-        "smullt z1.s, z17.h, z18.h",
-        "zip2 z3.s, z0.s, z1.s",
-        "movprfx z0, z2",
-        "addp z0.s, p7/m, z0.s, z3.s",
-        "uzp1 z16.s, z0.s, z0.s",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "smullb z0.s, z2.h, z3.h",
+        "smullt z1.s, z2.h, z3.h",
+        "zip1 z4.s, z0.s, z1.s",
+        "smullb z0.s, z2.h, z3.h",
+        "smullt z1.s, z2.h, z3.h",
+        "zip2 z5.s, z0.s, z1.s",
+        "movprfx z0, z4",
+        "addp z0.s, p7/m, z0.s, z5.s",
+        "uzp1 z2.s, z0.s, z0.s",
         "uzp2 z1.s, z0.s, z0.s",
-        "splice z16.d, p6, z16.d, z1.d"
+        "splice z2.d, p6, z2.d, z1.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsadbw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b01 0xf6 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uabdl v2.8h, v17.8b, v18.8b",
-        "uabdl2 v3.8h, v17.16b, v18.16b",
-        "addv h2, v2.8h",
-        "addv h3, v3.8h",
-        "zip1 v16.2d, v2.2d, v3.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uabdl v4.8h, v2.8b, v3.8b",
+        "uabdl2 v5.8h, v2.16b, v3.16b",
+        "addv h2, v4.8h",
+        "addv h3, v5.8h",
+        "zip1 v4.2d, v2.2d, v3.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsadbw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 37,
+      "ExpectedInstructionCount": 41,
       "Comment": [
         "Map 1 0b01 0xf6 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uabdlb z0.h, z17.b, z18.b",
-        "uabdlt z1.h, z17.b, z18.b",
-        "zip1 z2.h, z0.h, z1.h",
-        "uabdlb z0.h, z17.b, z18.b",
-        "uabdlt z1.h, z17.b, z18.b",
-        "zip2 z3.h, z0.h, z1.h",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uabdlb z0.h, z2.b, z3.b",
+        "uabdlt z1.h, z2.b, z3.b",
+        "zip1 z4.h, z0.h, z1.h",
+        "uabdlb z0.h, z2.b, z3.b",
+        "uabdlt z1.h, z2.b, z3.b",
+        "zip2 z5.h, z0.h, z1.h",
+        "addv h2, v4.8h",
+        "addv h3, v5.8h",
+        "zip1 z6.d, z2.d, z3.d",
+        "mov z2.q, z4.q[1]",
+        "mov z3.q, z5.q[1]",
         "addv h4, v2.8h",
-        "addv h5, v3.8h",
-        "zip1 z4.d, z4.d, z5.d",
-        "mov z2.q, z2.q[1]",
-        "mov z3.q, z3.q[1]",
-        "addv h2, v2.8h",
-        "addv h3, v3.8h",
-        "mov z1.d, d3",
+        "addv h2, v3.8h",
+        "mov z1.d, d2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.q, q2",
-        "mov z2.d, z4.d",
+        "mov z1.q, q3",
+        "mov z2.d, z6.d",
         "not p0.b, p7/z, p6.b",
         "mov z2.b, p0/m, z1.b",
         "mov z1.d, z2.d[1]",
@@ -5347,150 +6606,197 @@
         "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
         "mov z1.d, z2.d[2]",
-        "mov z16.d, z3.d",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z4.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmaskmovdqu xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 1 0b01 0xf7 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "cmlt v2.16b, v17.16b, #0",
-        "ldr q3, [x11]",
-        "bsl v2.16b, v16.16b, v3.16b",
-        "str q2, [x11]"
+        "mov z2.d, p7/m, z17.d",
+        "cmlt v3.16b, v2.16b, #0",
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x11",
+        "ldr q4, [x20]",
+        "mov v5.16b, v3.16b",
+        "bsl v5.16b, v2.16b, v4.16b",
+        "str q5, [x20]"
       ]
     },
     "vpsubb xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xf8 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sub v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sub v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsubb ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xf8 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sub z16.b, z17.b, z18.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sub z4.b, z2.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsubw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xf9 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sub v16.8h, v17.8h, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sub v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsubw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xf9 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sub z16.h, z17.h, z18.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sub z4.h, z2.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsubd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xfa 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sub v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sub v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsubd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xfa 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sub z16.s, z17.s, z18.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sub z4.s, z2.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsubq xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xfb 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sub v16.2d, v17.2d, v18.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sub v4.2d, v2.2d, v3.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsubq ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xfb 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sub z16.d, z17.d, z18.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sub z4.d, z2.d, z3.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpaddb xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xfc 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "add v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "add v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpaddb ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xfc 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "add z16.b, z17.b, z18.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "add z4.b, z2.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpaddw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xfd 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "add v16.8h, v17.8h, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "add v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpaddw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xfd 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "add z16.h, z17.h, z18.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "add z4.h, z2.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpaddd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xfe 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "add v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "add v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpaddd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 1 0b01 0xfe 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "add z16.s, z17.s, z18.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "add z4.s, z2.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/VEX_map1_FCMA.json b/unittests/InstructionCountCI/VEX_map1_FCMA.json
index b4aca96368..18a5695d06 100644
--- a/unittests/InstructionCountCI/VEX_map1_FCMA.json
+++ b/unittests/InstructionCountCI/VEX_map1_FCMA.json
@@ -10,94 +10,120 @@
   },
   "Instructions": {
     "vaddsubpd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b01 0xd0 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ext v2.16b, v18.16b, v18.16b, #8",
-        "fcadd v16.2d, v17.2d, v2.2d, #90"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "ext v4.16b, v3.16b, v3.16b, #8",
+        "fcadd v3.2d, v2.2d, v4.2d, #90",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vaddsubpd ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b01 0xd0 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z2, z18",
-        "ext z2.b, z2.b, z18.b, #8",
-        "fcadd z16.d, p7/m, z16.d, z2.d, #90"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z3",
+        "ext z4.b, z4.b, z3.b, #8",
+        "movprfx z3, z2",
+        "fcadd z3.d, p7/m, z3.d, z4.d, #90",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vaddsubpd ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b01 0xd0 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z2, z16",
-        "ext z2.b, z2.b, z16.b, #8",
-        "movprfx z16, z17",
-        "fcadd z16.d, p7/m, z16.d, z2.d, #90"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "movprfx z4, z3",
+        "ext z4.b, z4.b, z3.b, #8",
+        "movprfx z3, z2",
+        "fcadd z3.d, p7/m, z3.d, z4.d, #90",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vaddsubpd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 1 0b01 0xd0 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z2, z18",
-        "ext z2.b, z2.b, z18.b, #8",
-        "movprfx z16, z17",
-        "fcadd z16.d, p7/m, z16.d, z2.d, #90"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z3",
+        "ext z4.b, z4.b, z3.b, #8",
+        "movprfx z3, z2",
+        "fcadd z3.d, p7/m, z3.d, z4.d, #90",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vaddsubps xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 1 0b11 0xd0 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "rev64 v2.4s, v18.4s",
-        "fcadd v16.4s, v17.4s, v2.4s, #90"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "rev64 v4.4s, v3.4s",
+        "fcadd v3.4s, v2.4s, v4.4s, #90",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vaddsubps ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b11 0xd0 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "revw z2.d, p7/m, z16.d",
-        "movprfx z16, z17",
-        "fcadd z16.s, p7/m, z16.s, z2.s, #90"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "revw z4.d, p7/m, z3.d",
+        "movprfx z3, z2",
+        "fcadd z3.s, p7/m, z3.s, z4.s, #90",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vaddsubps ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Aliasing source and destination",
         "Map 1 0b11 0xd0 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "revw z2.d, p7/m, z18.d",
-        "fcadd z16.s, p7/m, z16.s, z2.s, #90"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "revw z4.d, p7/m, z3.d",
+        "movprfx z3, z2",
+        "fcadd z3.s, p7/m, z3.s, z4.s, #90",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vaddsubps ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 1 0b11 0xd0 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "revw z2.d, p7/m, z18.d",
-        "movprfx z16, z17",
-        "fcadd z16.s, p7/m, z16.s, z2.s, #90"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "revw z4.d, p7/m, z3.d",
+        "movprfx z3, z2",
+        "fcadd z3.s, p7/m, z3.s, z4.s, #90",
+        "mov z16.d, p7/m, z3.d"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/VEX_map2.json b/unittests/InstructionCountCI/VEX_map2.json
index b2ccc16d47..7cf224d27b 100644
--- a/unittests/InstructionCountCI/VEX_map2.json
+++ b/unittests/InstructionCountCI/VEX_map2.json
@@ -12,127 +12,150 @@
   },
   "Instructions": {
     "vpshufb xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b01 0x00 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.16b, #0x8f",
-        "and v2.16b, v18.16b, v2.16b",
-        "tbl v16.16b, {v17.16b}, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.16b, #0x8f",
+        "and v5.16b, v3.16b, v4.16b",
+        "tbl v3.16b, {v2.16b}, v5.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpshufb ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "Map 2 0b01 0x00 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z2.b, #-113",
-        "and z2.d, z18.d, z2.d",
-        "tbl v3.16b, {v17.16b}, v2.16b",
-        "mov z1.q, z17.q[1]",
-        "mov z4.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z4.b, #-113",
+        "and z5.d, z3.d, z4.d",
+        "tbl v3.16b, {v2.16b}, v5.16b",
+        "mov z1.q, z2.q[1]",
+        "mov z4.d, z2.d",
         "mov z4.b, p6/m, z1.b",
-        "tbl v2.16b, {v4.16b}, v2.16b",
+        "tbl v2.16b, {v4.16b}, v5.16b",
         "mov z1.q, q2",
-        "mov z16.d, z3.d",
+        "mov z4.d, z3.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z4.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vphaddw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x01 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "addp v16.8h, v17.8h, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "addp v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vphaddw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 22,
       "Comment": [
         "Map 2 0b01 0x01 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z0, z17",
-        "addp z0.h, p7/m, z0.h, z18.h",
-        "uzp1 z2.h, z0.h, z0.h",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z0, z2",
+        "addp z0.h, p7/m, z0.h, z3.h",
+        "uzp1 z4.h, z0.h, z0.h",
         "uzp2 z1.h, z0.h, z0.h",
-        "splice z2.d, p6, z2.d, z1.d",
-        "mov z1.d, z2.d[2]",
-        "mov z3.d, z2.d",
+        "splice z4.d, p6, z4.d, z1.d",
+        "mov z1.d, z4.d[2]",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z3.d, p0/m, z1.d",
+        "mov z2.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z2.d[1]",
-        "mov z16.d, z3.d",
+        "mov z1.d, z4.d[1]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vphaddd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x02 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "addp v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "addp v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vphaddd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 22,
       "Comment": [
         "Map 2 0b01 0x02 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z0, z17",
-        "addp z0.s, p7/m, z0.s, z18.s",
-        "uzp1 z2.s, z0.s, z0.s",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z0, z2",
+        "addp z0.s, p7/m, z0.s, z3.s",
+        "uzp1 z4.s, z0.s, z0.s",
         "uzp2 z1.s, z0.s, z0.s",
-        "splice z2.d, p6, z2.d, z1.d",
-        "mov z1.d, z2.d[2]",
-        "mov z3.d, z2.d",
+        "splice z4.d, p6, z4.d, z1.d",
+        "mov z1.d, z4.d[2]",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z3.d, p0/m, z1.d",
+        "mov z2.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z2.d[1]",
-        "mov z16.d, z3.d",
+        "mov z1.d, z4.d[1]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vphaddsw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b01 0x03 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 v2.8h, v17.8h, v18.8h",
-        "uzp2 v3.8h, v17.8h, v18.8h",
-        "sqadd v16.8h, v2.8h, v3.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uzp1 v4.8h, v2.8h, v3.8h",
+        "uzp2 v5.8h, v2.8h, v3.8h",
+        "sqadd v2.8h, v4.8h, v5.8h",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vphaddsw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "Map 2 0b01 0x03 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 z2.h, z17.h, z18.h",
-        "uzp2 z3.h, z17.h, z18.h",
-        "sqadd z2.h, z2.h, z3.h",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uzp1 z4.h, z2.h, z3.h",
+        "uzp2 z5.h, z2.h, z3.h",
+        "sqadd z2.h, z4.h, z5.h",
         "mov z1.d, z2.d[2]",
         "mov z3.d, z2.d",
         "mrs x0, nzcv",
@@ -141,68 +164,80 @@
         "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
         "mov z1.d, z2.d[1]",
-        "mov z16.d, z3.d",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z4.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaddubsw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "Map 2 0b01 0x04 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uxtl v2.8h, v17.8b",
-        "sxtl v3.8h, v18.8b",
-        "mul v2.8h, v2.8h, v3.8h",
-        "uxtl2 v3.8h, v17.16b",
-        "sxtl2 v4.8h, v18.16b",
-        "mul v3.8h, v3.8h, v4.8h",
-        "uzp1 v4.8h, v2.8h, v3.8h",
-        "uzp2 v2.8h, v2.8h, v3.8h",
-        "sqadd v16.8h, v4.8h, v2.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uxtl v4.8h, v2.8b",
+        "sxtl v5.8h, v3.8b",
+        "mul v6.8h, v4.8h, v5.8h",
+        "uxtl2 v4.8h, v2.16b",
+        "sxtl2 v2.8h, v3.16b",
+        "mul v3.8h, v4.8h, v2.8h",
+        "uzp1 v2.8h, v6.8h, v3.8h",
+        "uzp2 v4.8h, v6.8h, v3.8h",
+        "sqadd v3.8h, v2.8h, v4.8h",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmaddubsw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 12,
       "Comment": [
         "Map 2 0b01 0x04 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uunpklo z2.h, z17.b",
-        "sunpklo z3.h, z18.b",
-        "mul z2.h, z2.h, z3.h",
-        "uunpkhi z3.h, z17.b",
-        "sunpkhi z4.h, z18.b",
-        "mul z3.h, z3.h, z4.h",
-        "uzp1 z4.h, z2.h, z3.h",
-        "uzp2 z2.h, z2.h, z3.h",
-        "sqadd z16.h, z4.h, z2.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uunpklo z4.h, z2.b",
+        "sunpklo z5.h, z3.b",
+        "mul z6.h, z4.h, z5.h",
+        "uunpkhi z4.h, z2.b",
+        "sunpkhi z2.h, z3.b",
+        "mul z3.h, z4.h, z2.h",
+        "uzp1 z2.h, z6.h, z3.h",
+        "uzp2 z4.h, z6.h, z3.h",
+        "sqadd z3.h, z2.h, z4.h",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vphsubw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b01 0x05 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 v2.8h, v17.8h, v18.8h",
-        "uzp2 v3.8h, v17.8h, v18.8h",
-        "sub v16.8h, v2.8h, v3.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uzp1 v4.8h, v2.8h, v3.8h",
+        "uzp2 v5.8h, v2.8h, v3.8h",
+        "sub v2.8h, v4.8h, v5.8h",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vphsubw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "Map 2 0b01 0x05 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 z2.h, z17.h, z18.h",
-        "uzp2 z3.h, z17.h, z18.h",
-        "sub z2.h, z2.h, z3.h",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uzp1 z4.h, z2.h, z3.h",
+        "uzp2 z5.h, z2.h, z3.h",
+        "sub z2.h, z4.h, z5.h",
         "mov z1.d, z2.d[2]",
         "mov z3.d, z2.d",
         "mrs x0, nzcv",
@@ -211,34 +246,40 @@
         "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
         "mov z1.d, z2.d[1]",
-        "mov z16.d, z3.d",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z4.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vphsubd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b01 0x06 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 v2.4s, v17.4s, v18.4s",
-        "uzp2 v3.4s, v17.4s, v18.4s",
-        "sub v16.4s, v2.4s, v3.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uzp1 v4.4s, v2.4s, v3.4s",
+        "uzp2 v5.4s, v2.4s, v3.4s",
+        "sub v2.4s, v4.4s, v5.4s",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vphsubd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "Map 2 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 z2.s, z17.s, z18.s",
-        "uzp2 z3.s, z17.s, z18.s",
-        "sub z2.s, z2.s, z3.s",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uzp1 z4.s, z2.s, z3.s",
+        "uzp2 z5.s, z2.s, z3.s",
+        "sub z2.s, z4.s, z5.s",
         "mov z1.d, z2.d[2]",
         "mov z3.d, z2.d",
         "mrs x0, nzcv",
@@ -247,34 +288,40 @@
         "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
         "mov z1.d, z2.d[1]",
-        "mov z16.d, z3.d",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z4.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vphsubsw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b01 0x07 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 v2.8h, v17.8h, v18.8h",
-        "uzp2 v3.8h, v17.8h, v18.8h",
-        "sqsub v16.8h, v2.8h, v3.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uzp1 v4.8h, v2.8h, v3.8h",
+        "uzp2 v5.8h, v2.8h, v3.8h",
+        "sqsub v2.8h, v4.8h, v5.8h",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vphsubsw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "Map 2 0b01 0x07 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 z2.h, z17.h, z18.h",
-        "uzp2 z3.h, z17.h, z18.h",
-        "sqsub z2.h, z2.h, z3.h",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uzp1 z4.h, z2.h, z3.h",
+        "uzp2 z5.h, z2.h, z3.h",
+        "sqsub z2.h, z4.h, z5.h",
         "mov z1.d, z2.d[2]",
         "mov z3.d, z2.d",
         "mrs x0, nzcv",
@@ -283,323 +330,385 @@
         "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
         "mov z1.d, z2.d[1]",
-        "mov z16.d, z3.d",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z4.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsignb xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b01 0x08 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqshl v2.16b, v18.16b, #7",
-        "srshr v2.16b, v2.16b, #7",
-        "mul v16.16b, v17.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqshl v4.16b, v3.16b, #7",
+        "srshr v3.16b, v4.16b, #7",
+        "mul v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsignb ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 2 0b01 0x08 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z2, z18",
-        "sqshl z2.b, p7/m, z2.b, #7",
-        "srshr z2.b, p7/m, z2.b, #7",
-        "mul z16.b, z17.b, z2.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z3",
+        "sqshl z4.b, p7/m, z4.b, #7",
+        "movprfx z3, z4",
+        "srshr z3.b, p7/m, z3.b, #7",
+        "mul z4.b, z2.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsignw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b01 0x09 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqshl v2.8h, v18.8h, #15",
-        "srshr v2.8h, v2.8h, #15",
-        "mul v16.8h, v17.8h, v2.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqshl v4.8h, v3.8h, #15",
+        "srshr v3.8h, v4.8h, #15",
+        "mul v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsignw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 2 0b01 0x09 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z2, z18",
-        "sqshl z2.h, p7/m, z2.h, #15",
-        "srshr z2.h, p7/m, z2.h, #15",
-        "mul z16.h, z17.h, z2.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z3",
+        "sqshl z4.h, p7/m, z4.h, #15",
+        "movprfx z3, z4",
+        "srshr z3.h, p7/m, z3.h, #15",
+        "mul z4.h, z2.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsignd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b01 0x0a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqshl v2.4s, v18.4s, #31",
-        "srshr v2.4s, v2.4s, #31",
-        "mul v16.4s, v17.4s, v2.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqshl v4.4s, v3.4s, #31",
+        "srshr v3.4s, v4.4s, #31",
+        "mul v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsignd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 2 0b01 0x0a 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z2, z18",
-        "sqshl z2.s, p7/m, z2.s, #31",
-        "srshr z2.s, p7/m, z2.s, #31",
-        "mul z16.s, z17.s, z2.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z3",
+        "sqshl z4.s, p7/m, z4.s, #31",
+        "movprfx z3, z4",
+        "srshr z3.s, p7/m, z3.s, #31",
+        "mul z4.s, z2.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmulhrsw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 11,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "Map 2 0b01 0x0b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "smull v2.4s, v17.4h, v18.4h",
-        "smull2 v3.4s, v17.8h, v18.8h",
-        "sshr v2.4s, v2.4s, #14",
-        "sshr v3.4s, v3.4s, #14",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "smull v4.4s, v2.4h, v3.4h",
+        "smull2 v5.4s, v2.8h, v3.8h",
+        "sshr v2.4s, v4.4s, #14",
+        "sshr v3.4s, v5.4s, #14",
         "movi v4.4s, #0x1, lsl #0",
-        "add v2.4s, v2.4s, v4.4s",
-        "add v3.4s, v3.4s, v4.4s",
-        "shrn v2.4h, v2.4s, #1",
-        "mov v0.16b, v2.16b",
-        "shrn2 v0.8h, v3.4s, #1",
-        "mov v16.16b, v0.16b"
+        "add v5.4s, v2.4s, v4.4s",
+        "add v2.4s, v3.4s, v4.4s",
+        "shrn v3.4h, v5.4s, #1",
+        "mov v0.16b, v3.16b",
+        "shrn2 v0.8h, v2.4s, #1",
+        "mov v4.16b, v0.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmulhrsw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 22,
       "Comment": [
         "Map 2 0b01 0x0b 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "smullb z0.s, z17.h, z18.h",
-        "smullt z1.s, z17.h, z18.h",
-        "zip1 z2.s, z0.s, z1.s",
-        "smullb z0.s, z17.h, z18.h",
-        "smullt z1.s, z17.h, z18.h",
-        "zip2 z3.s, z0.s, z1.s",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "smullb z0.s, z2.h, z3.h",
+        "smullt z1.s, z2.h, z3.h",
+        "zip1 z4.s, z0.s, z1.s",
+        "smullb z0.s, z2.h, z3.h",
+        "smullt z1.s, z2.h, z3.h",
+        "zip2 z5.s, z0.s, z1.s",
+        "movprfx z2, z4",
         "asr z2.s, p7/m, z2.s, #14",
+        "movprfx z3, z5",
         "asr z3.s, p7/m, z3.s, #14",
         "mov z4.s, #1",
-        "add z2.s, z2.s, z4.s",
-        "add z3.s, z3.s, z4.s",
-        "shrnb z2.h, z2.s, #1",
-        "uzp1 z2.h, z2.h, z2.h",
-        "shrnb z1.h, z3.s, #1",
+        "add z5.s, z2.s, z4.s",
+        "add z2.s, z3.s, z4.s",
+        "shrnb z3.h, z5.s, #1",
+        "uzp1 z3.h, z3.h, z3.h",
+        "shrnb z1.h, z2.s, #1",
         "uzp1 z1.h, z1.h, z1.h",
-        "movprfx z16, z2",
-        "splice z16.h, p6, z16.h, z1.h"
+        "movprfx z4, z3",
+        "splice z4.h, p6, z4.h, z1.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpermilps xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 2 0b01 0x0c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.4s, #0x3, lsl #0",
-        "and v2.16b, v18.16b, v2.16b",
-        "trn1 v2.16b, v2.16b, v2.16b",
-        "trn1 v2.8h, v2.8h, v2.8h",
-        "shl v2.16b, v2.16b, #2",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.4s, #0x3, lsl #0",
+        "and v5.16b, v3.16b, v4.16b",
+        "trn1 v3.16b, v5.16b, v5.16b",
+        "trn1 v4.8h, v3.8h, v3.8h",
+        "shl v3.16b, v4.16b, #2",
         "mov w20, #0x100",
         "movk w20, #0x302, lsl #16",
-        "dup v3.4s, w20",
-        "add v2.16b, v3.16b, v2.16b",
-        "tbl v16.16b, {v17.16b}, v2.16b"
+        "dup v4.4s, w20",
+        "add v5.16b, v4.16b, v3.16b",
+        "tbl v3.16b, {v2.16b}, v5.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilps ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 21,
       "Comment": [
         "Map 2 0b01 0x0c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z2.s, #3",
-        "and z2.d, z18.d, z2.d",
-        "trn1 z2.b, z2.b, z2.b",
-        "trn1 z2.h, z2.h, z2.h",
-        "lsl z2.b, p7/m, z2.b, #2",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z4.s, #3",
+        "and z5.d, z3.d, z4.d",
+        "trn1 z3.b, z5.b, z5.b",
+        "trn1 z4.h, z3.h, z3.h",
+        "movprfx z3, z4",
+        "lsl z3.b, p7/m, z3.b, #2",
         "mov w20, #0x100",
         "movk w20, #0x302, lsl #16",
-        "mov z3.s, w20",
-        "movi v4.2d, #0x0",
-        "mov z5.b, #16",
-        "mov z1.q, q5",
+        "mov z4.s, w20",
+        "movi v5.2d, #0x0",
+        "mov z6.b, #16",
+        "mov z1.q, q6",
+        "mov z7.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z4.b, p0/m, z1.b",
-        "add z3.b, z3.b, z4.b",
-        "add z2.b, z3.b, z2.b",
-        "tbl z16.b, {z17.b}, z2.b"
+        "mov z7.b, p0/m, z1.b",
+        "add z5.b, z4.b, z7.b",
+        "add z4.b, z5.b, z3.b",
+        "tbl z3.b, {z2.b}, z4.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 15,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "Map 2 0b01 0x0d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ushr v2.2d, v18.2d, #1",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "ushr v4.2d, v3.2d, #1",
         "mov w0, #0x1",
         "dup v3.2d, x0",
-        "and v2.16b, v2.16b, v3.16b",
-        "trn1 v2.16b, v2.16b, v2.16b",
-        "trn1 v2.8h, v2.8h, v2.8h",
-        "trn1 v2.4s, v2.4s, v2.4s",
-        "shl v2.16b, v2.16b, #3",
+        "and v5.16b, v4.16b, v3.16b",
+        "trn1 v3.16b, v5.16b, v5.16b",
+        "trn1 v4.8h, v3.8h, v3.8h",
+        "trn1 v3.4s, v4.4s, v4.4s",
+        "shl v4.16b, v3.16b, #3",
         "mov x20, #0x100",
         "movk x20, #0x302, lsl #16",
         "movk x20, #0x504, lsl #32",
         "movk x20, #0x706, lsl #48",
         "dup v3.2d, x20",
-        "add v2.16b, v3.16b, v2.16b",
-        "tbl v16.16b, {v17.16b}, v2.16b"
+        "add v5.16b, v3.16b, v4.16b",
+        "tbl v3.16b, {v2.16b}, v5.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 21,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "Map 2 0b01 0x0d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z2, z18",
-        "lsr z2.d, p7/m, z2.d, #1",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z3",
+        "lsr z4.d, p7/m, z4.d, #1",
         "mov z3.d, #1",
-        "and z2.d, z2.d, z3.d",
-        "trn1 z2.b, z2.b, z2.b",
-        "trn1 z2.h, z2.h, z2.h",
-        "trn1 z2.s, z2.s, z2.s",
-        "lsl z2.b, p7/m, z2.b, #3",
+        "and z5.d, z4.d, z3.d",
+        "trn1 z3.b, z5.b, z5.b",
+        "trn1 z4.h, z3.h, z3.h",
+        "trn1 z3.s, z4.s, z4.s",
+        "movprfx z4, z3",
+        "lsl z4.b, p7/m, z4.b, #3",
         "mov x20, #0x100",
         "movk x20, #0x302, lsl #16",
         "movk x20, #0x504, lsl #32",
         "movk x20, #0x706, lsl #48",
         "mov z3.d, x20",
-        "movi v4.2d, #0x0",
-        "mov z5.b, #16",
-        "mov z1.q, q5",
+        "movi v5.2d, #0x0",
+        "mov z6.b, #16",
+        "mov z1.q, q6",
+        "mov z7.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z4.b, p0/m, z1.b",
-        "add z3.b, z3.b, z4.b",
-        "add z2.b, z3.b, z2.b",
-        "tbl z16.b, {z17.b}, z2.b"
+        "mov z7.b, p0/m, z1.b",
+        "add z5.b, z3.b, z7.b",
+        "add z3.b, z5.b, z4.b",
+        "tbl z4.b, {z2.b}, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vtestps xmm0, xmm1": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 22,
       "Comment": [
         "Map 2 0b01 0x0e 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
         "mov w20, #0x80000000",
-        "dup v2.4s, w20",
-        "and v3.16b, v17.16b, v16.16b",
-        "bic v4.16b, v17.16b, v16.16b",
-        "and v3.16b, v3.16b, v2.16b",
-        "and v2.16b, v4.16b, v2.16b",
-        "umaxv h3, v3.8h",
-        "umaxv h2, v2.8h",
-        "umov w20, v3.h[0]",
+        "dup v4.4s, w20",
+        "and v5.16b, v3.16b, v2.16b",
+        "bic v6.16b, v3.16b, v2.16b",
+        "and v2.16b, v5.16b, v4.16b",
+        "and v3.16b, v6.16b, v4.16b",
+        "umaxv h4, v2.8h",
+        "umaxv h2, v3.8h",
+        "umov w20, v4.h[0]",
         "umov w21, v2.h[0]",
-        "mov w27, #0x0",
-        "mov w26, #0x1",
+        "mov w22, #0x0",
+        "mov w23, #0x1",
         "cmp x21, #0x0 (0)",
-        "cset x21, eq",
+        "cset x24, eq",
         "tst w20, w20",
         "mrs x20, nzcv",
-        "orr w20, w20, w21, lsl #29",
-        "msr nzcv, x20"
+        "orr w21, w20, w24, lsl #29",
+        "mov x26, x23",
+        "mov x27, x22",
+        "msr nzcv, x21"
       ]
     },
     "vtestps ymm0, ymm1": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 22,
       "Comment": [
         "Map 2 0b01 0x0e 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
         "mov w20, #0x80000000",
-        "mov z2.s, w20",
-        "and z3.d, z17.d, z16.d",
-        "bic z4.d, z17.d, z16.d",
-        "and z3.d, z3.d, z2.d",
-        "and z2.d, z4.d, z2.d",
-        "umaxv h3, p7, z3.h",
-        "umaxv h2, p7, z2.h",
-        "umov w20, v3.h[0]",
+        "mov z4.s, w20",
+        "and z5.d, z3.d, z2.d",
+        "bic z6.d, z3.d, z2.d",
+        "and z2.d, z5.d, z4.d",
+        "and z3.d, z6.d, z4.d",
+        "umaxv h4, p7, z2.h",
+        "umaxv h2, p7, z3.h",
+        "umov w20, v4.h[0]",
         "umov w21, v2.h[0]",
-        "mov w27, #0x0",
-        "mov w26, #0x1",
+        "mov w22, #0x0",
+        "mov w23, #0x1",
         "cmp x21, #0x0 (0)",
-        "cset x21, eq",
+        "cset x24, eq",
         "tst w20, w20",
         "mrs x20, nzcv",
-        "orr w20, w20, w21, lsl #29",
-        "msr nzcv, x20"
+        "orr w21, w20, w24, lsl #29",
+        "mov x26, x23",
+        "mov x27, x22",
+        "msr nzcv, x21"
       ]
     },
     "vtestpd xmm0, xmm1": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 22,
       "Comment": [
         "Map 2 0b01 0x0f 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
         "mov x20, #0x8000000000000000",
-        "dup v2.2d, x20",
-        "and v3.16b, v17.16b, v16.16b",
-        "bic v4.16b, v17.16b, v16.16b",
-        "and v3.16b, v3.16b, v2.16b",
-        "and v2.16b, v4.16b, v2.16b",
-        "umaxv h3, v3.8h",
-        "umaxv h2, v2.8h",
-        "umov w20, v3.h[0]",
+        "dup v4.2d, x20",
+        "and v5.16b, v3.16b, v2.16b",
+        "bic v6.16b, v3.16b, v2.16b",
+        "and v2.16b, v5.16b, v4.16b",
+        "and v3.16b, v6.16b, v4.16b",
+        "umaxv h4, v2.8h",
+        "umaxv h2, v3.8h",
+        "umov w20, v4.h[0]",
         "umov w21, v2.h[0]",
-        "mov w27, #0x0",
-        "mov w26, #0x1",
+        "mov w22, #0x0",
+        "mov w23, #0x1",
         "cmp x21, #0x0 (0)",
-        "cset x21, eq",
+        "cset x24, eq",
         "tst w20, w20",
         "mrs x20, nzcv",
-        "orr w20, w20, w21, lsl #29",
-        "msr nzcv, x20"
+        "orr w21, w20, w24, lsl #29",
+        "mov x26, x23",
+        "mov x27, x22",
+        "msr nzcv, x21"
       ]
     },
     "vtestpd ymm0, ymm1": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 22,
       "Comment": [
         "Map 2 0b01 0x0f 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
         "mov x20, #0x8000000000000000",
-        "mov z2.d, x20",
-        "and z3.d, z17.d, z16.d",
-        "bic z4.d, z17.d, z16.d",
-        "and z3.d, z3.d, z2.d",
-        "and z2.d, z4.d, z2.d",
-        "umaxv h3, p7, z3.h",
-        "umaxv h2, p7, z2.h",
-        "umov w20, v3.h[0]",
+        "mov z4.d, x20",
+        "and z5.d, z3.d, z2.d",
+        "bic z6.d, z3.d, z2.d",
+        "and z2.d, z5.d, z4.d",
+        "and z3.d, z6.d, z4.d",
+        "umaxv h4, p7, z2.h",
+        "umaxv h2, p7, z3.h",
+        "umov w20, v4.h[0]",
         "umov w21, v2.h[0]",
-        "mov w27, #0x0",
-        "mov w26, #0x1",
+        "mov w22, #0x0",
+        "mov w23, #0x1",
         "cmp x21, #0x0 (0)",
-        "cset x21, eq",
+        "cset x24, eq",
         "tst w20, w20",
         "mrs x20, nzcv",
-        "orr w20, w20, w21, lsl #29",
-        "msr nzcv, x20"
+        "orr w21, w20, w24, lsl #29",
+        "mov x26, x23",
+        "mov x27, x22",
+        "msr nzcv, x21"
       ]
     },
     "vcvtph2ps xmm0, xmm1": {
@@ -617,1155 +726,1455 @@
       ]
     },
     "vpermps ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "Map 2 0b01 0x16 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z2.s, #7",
-        "and z2.d, z17.d, z2.d",
-        "trn1 z2.b, z2.b, z2.b",
-        "trn1 z2.h, z2.h, z2.h",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z4.s, #7",
+        "and z5.d, z2.d, z4.d",
+        "trn1 z2.b, z5.b, z5.b",
+        "trn1 z4.h, z2.h, z2.h",
+        "movprfx z2, z4",
         "lsl z2.b, p7/m, z2.b, #2",
         "mov w20, #0x100",
         "movk w20, #0x302, lsl #16",
-        "mov z3.s, w20",
-        "add z2.b, z2.b, z3.b",
-        "tbl z16.b, {z18.b}, z2.b"
+        "mov z4.s, w20",
+        "add z5.b, z2.b, z4.b",
+        "tbl z2.b, {z3.b}, z5.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vptest xmm0, xmm1": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "Map 2 0b01 0x16 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "and v2.16b, v16.16b, v17.16b",
-        "bic v3.16b, v17.16b, v16.16b",
-        "umaxv h2, v2.8h",
-        "umaxv h3, v3.8h",
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "and v4.16b, v2.16b, v3.16b",
+        "bic v5.16b, v3.16b, v2.16b",
+        "umaxv h2, v4.8h",
+        "umaxv h3, v5.8h",
         "umov w20, v2.h[0]",
         "umov w21, v3.h[0]",
-        "mov w27, #0x0",
-        "mov w26, #0x1",
+        "mov w22, #0x0",
+        "mov w23, #0x1",
         "cmp x21, #0x0 (0)",
-        "cset x21, eq",
+        "cset x24, eq",
         "tst w20, w20",
         "mrs x20, nzcv",
-        "orr w20, w20, w21, lsl #29",
-        "msr nzcv, x20"
+        "orr w21, w20, w24, lsl #29",
+        "mov x26, x23",
+        "mov x27, x22",
+        "msr nzcv, x21"
       ]
     },
     "vptest ymm0, ymm1": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "Map 2 0b01 0x16 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "and z2.d, z16.d, z17.d",
-        "bic z3.d, z17.d, z16.d",
-        "umaxv h2, p7, z2.h",
-        "umaxv h3, p7, z3.h",
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "and z4.d, z2.d, z3.d",
+        "bic z5.d, z3.d, z2.d",
+        "umaxv h2, p7, z4.h",
+        "umaxv h3, p7, z5.h",
         "umov w20, v2.h[0]",
         "umov w21, v3.h[0]",
-        "mov w27, #0x0",
-        "mov w26, #0x1",
+        "mov w22, #0x0",
+        "mov w23, #0x1",
         "cmp x21, #0x0 (0)",
-        "cset x21, eq",
+        "cset x24, eq",
         "tst w20, w20",
         "mrs x20, nzcv",
-        "orr w20, w20, w21, lsl #29",
-        "msr nzcv, x20"
+        "orr w21, w20, w24, lsl #29",
+        "mov x26, x23",
+        "mov x27, x22",
+        "msr nzcv, x21"
       ]
     },
     "vbroadcastss xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x18 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1r {v16.4s}, [x4]"
+        "mov x20, x4",
+        "ld1r {v2.4s}, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vbroadcastss ymm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x18 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1rw {z16.s}, p7/z, [x4]"
+        "mov x20, x4",
+        "ld1rw {z2.s}, p7/z, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vbroadcastsd ymm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x19 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1rd {z16.d}, p7/z, [x4]"
+        "mov x20, x4",
+        "ld1rd {z2.d}, p7/z, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vbroadcastf128 ymm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x1a 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1rqb {z16.b}, p7/z, [x4]"
+        "mov x20, x4",
+        "ld1rqb {z2.b}, p7/z, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpabsb xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x1c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "abs v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "abs v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpabsb ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x1c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "abs z16.b, p7/m, z17.b"
+        "mov z2.d, p7/m, z17.d",
+        "abs z3.b, p7/m, z2.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpabsw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x1d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "abs v16.8h, v17.8h"
+        "mov z2.d, p7/m, z17.d",
+        "abs v3.8h, v2.8h",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpabsw ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x1d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "abs z16.h, p7/m, z17.h"
+        "mov z2.d, p7/m, z17.d",
+        "abs z3.h, p7/m, z2.h",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpabsd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x1e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "abs v16.4s, v17.4s"
+        "mov z2.d, p7/m, z17.d",
+        "abs v3.4s, v2.4s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpabsd ymm0, ymm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x1e 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "abs z16.s, p7/m, z17.s"
+        "mov z2.d, p7/m, z17.d",
+        "abs z3.s, p7/m, z2.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmovsxbw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x20 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sxtl v16.8h, v17.8b"
+        "mov z2.d, p7/m, z17.d",
+        "sxtl v3.8h, v2.8b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmovsxbw ymm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x20 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sunpklo z16.h, z17.b"
+        "mov z2.d, p7/m, z17.d",
+        "sunpklo z3.h, z2.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmovsxbd xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x21 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sxtl v2.8h, v17.8b",
-        "sxtl v16.4s, v2.4h"
+        "mov z2.d, p7/m, z17.d",
+        "sxtl v3.8h, v2.8b",
+        "sxtl v2.4s, v3.4h",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpmovsxbd ymm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x21 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sunpklo z2.h, z17.b",
-        "sunpklo z16.s, z2.h"
+        "mov z2.d, p7/m, z17.d",
+        "sunpklo z3.h, z2.b",
+        "sunpklo z2.s, z3.h",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpmovsxbq xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b01 0x22 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sxtl v2.8h, v17.8b",
-        "sxtl v2.4s, v2.4h",
-        "sxtl v16.2d, v2.2s"
+        "mov z2.d, p7/m, z17.d",
+        "sxtl v3.8h, v2.8b",
+        "sxtl v2.4s, v3.4h",
+        "sxtl v3.2d, v2.2s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmovsxbq ymm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b01 0x22 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sunpklo z2.h, z17.b",
-        "sunpklo z2.s, z2.h",
-        "sunpklo z16.d, z2.s"
+        "mov z2.d, p7/m, z17.d",
+        "sunpklo z3.h, z2.b",
+        "sunpklo z2.s, z3.h",
+        "sunpklo z3.d, z2.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmovsxwd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x23 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sxtl v16.4s, v17.4h"
+        "mov z2.d, p7/m, z17.d",
+        "sxtl v3.4s, v2.4h",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmovsxwd ymm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x23 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sunpklo z16.s, z17.h"
+        "mov z2.d, p7/m, z17.d",
+        "sunpklo z3.s, z2.h",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmovsxwq xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x24 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sxtl v2.4s, v17.4h",
-        "sxtl v16.2d, v2.2s"
+        "mov z2.d, p7/m, z17.d",
+        "sxtl v3.4s, v2.4h",
+        "sxtl v2.2d, v3.2s",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpmovsxwq ymm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x24 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sunpklo z2.s, z17.h",
-        "sunpklo z16.d, z2.s"
+        "mov z2.d, p7/m, z17.d",
+        "sunpklo z3.s, z2.h",
+        "sunpklo z2.d, z3.s",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpmovsxdq xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x25 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sxtl v16.2d, v17.2s"
+        "mov z2.d, p7/m, z17.d",
+        "sxtl v3.2d, v2.2s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmovsxdq ymm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x25 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sunpklo z16.d, z17.s"
+        "mov z2.d, p7/m, z17.d",
+        "sunpklo z3.d, z2.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmuldq xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b01 0x28 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 v2.4s, v17.4s, v17.4s",
-        "uzp1 v3.4s, v18.4s, v18.4s",
-        "smull v16.2d, v2.2s, v3.2s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uzp1 v4.4s, v2.4s, v2.4s",
+        "uzp1 v2.4s, v3.4s, v3.4s",
+        "smull v3.2d, v4.2s, v2.2s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmuldq ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 2 0b01 0x28 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uzp1 z2.s, z17.s, z17.s",
-        "uzp1 z3.s, z18.s, z18.s",
-        "smullb z0.d, z2.s, z3.s",
-        "smullt z1.d, z2.s, z3.s",
-        "zip1 z16.d, z0.d, z1.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "uzp1 z4.s, z2.s, z2.s",
+        "uzp1 z2.s, z3.s, z3.s",
+        "smullb z0.d, z4.s, z2.s",
+        "smullt z1.d, z4.s, z2.s",
+        "zip1 z3.d, z0.d, z1.d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpcmpeqq xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x29 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "cmeq v16.2d, v17.2d, v18.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "cmeq v4.2d, v2.2d, v3.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpcmpeqq ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 2 0b01 0x29 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mrs x0, nzcv",
-        "cmpeq p0.d, p7/z, z17.d, z18.d",
-        "not z0.d, p0/m, z17.d",
-        "movprfx z16.d, p0/z, z17.d",
-        "orr z16.d, p0/m, z16.d, z0.d",
-        "msr nzcv, x0"
+        "cmpeq p0.d, p7/z, z2.d, z3.d",
+        "not z0.d, p0/m, z2.d",
+        "movprfx z4.d, p0/z, z2.d",
+        "orr z4.d, p0/m, z4.d, z0.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vmovntdqa xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x2a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q16, [x4]"
+        "mov x20, x4",
+        "ldr q2, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmovntdqa ymm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x2a 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1b {z16.b}, p7/z, [x4]"
+        "mov x20, x4",
+        "ld1b {z2.b}, p7/z, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpackusdw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b01 0x2b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqxtun v16.4h, v17.4s",
-        "sqxtun2 v16.8h, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqxtun v4.4h, v2.4s",
+        "sqxtun2 v4.8h, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpackusdw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 19,
+      "ExpectedInstructionCount": 22,
       "Comment": [
         "Map 2 0b01 0x2b 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "sqxtunb z1.h, z18.s",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "sqxtunb z1.h, z3.s",
         "uzp1 z1.h, z1.h, z1.h",
-        "sqxtunb z2.h, z17.s",
-        "uzp1 z2.h, z2.h, z2.h",
-        "splice z2.h, p6, z2.h, z1.h",
-        "mov z1.d, z2.d[1]",
-        "mov z3.d, z2.d",
+        "sqxtunb z4.h, z2.s",
+        "uzp1 z4.h, z4.h, z4.h",
+        "splice z4.h, p6, z4.h, z1.h",
+        "mov z1.d, z4.d[1]",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z3.d, p0/m, z1.d",
+        "mov z2.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z2.d[2]",
-        "mov z16.d, z3.d",
+        "mov z1.d, z4.d[2]",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vmaskmovps xmm0, xmm1, [rax]": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 2 0b01 0x2c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.s, p6/z, z17.s, #0",
-        "ld1w {z2.s}, p0/z, [x4]",
-        "mov v16.16b, v2.16b",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.s, p6/z, z2.s, #0",
+        "ld1w {z3.s}, p0/z, [x20]",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d",
+        "msr nzcv, x21"
       ]
     },
     "vmaskmovps ymm0, ymm1, [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x2c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.s, p7/z, z17.s, #0",
-        "ld1w {z16.s}, p0/z, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.s, p7/z, z2.s, #0",
+        "ld1w {z3.s}, p0/z, [x20]",
+        "mov z16.d, p7/m, z3.d",
+        "msr nzcv, x21"
       ]
     },
     "vmaskmovpd xmm0, xmm1, [rax]": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 2 0b01 0x2d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.d, p6/z, z17.d, #0",
-        "ld1d {z2.d}, p0/z, [x4]",
-        "mov v16.16b, v2.16b",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.d, p6/z, z2.d, #0",
+        "ld1d {z3.d}, p0/z, [x20]",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d",
+        "msr nzcv, x21"
       ]
     },
     "vmaskmovpd ymm0, ymm1, [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x2d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.d, p7/z, z17.d, #0",
-        "ld1d {z16.d}, p0/z, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.d, p7/z, z2.d, #0",
+        "ld1d {z3.d}, p0/z, [x20]",
+        "mov z16.d, p7/m, z3.d",
+        "msr nzcv, x21"
       ]
     },
     "vmaskmovps [rax], xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x2e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.s, p6/z, z16.s, #0",
-        "st1w {z17.s}, p0, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.s, p6/z, z2.s, #0",
+        "st1w {z3.s}, p0, [x20]",
+        "msr nzcv, x21"
       ]
     },
     "vmaskmovps [rax], ymm0, ymm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x2e 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.s, p7/z, z16.s, #0",
-        "st1w {z17.s}, p0, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.s, p7/z, z2.s, #0",
+        "st1w {z3.s}, p0, [x20]",
+        "msr nzcv, x21"
       ]
     },
     "vmaskmovpd [rax], xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x2f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.d, p6/z, z16.d, #0",
-        "st1d {z17.d}, p0, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.d, p6/z, z2.d, #0",
+        "st1d {z3.d}, p0, [x20]",
+        "msr nzcv, x21"
       ]
     },
     "vmaskmovpd [rax], ymm0, ymm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x2f 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.d, p7/z, z16.d, #0",
-        "st1d {z17.d}, p0, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.d, p7/z, z2.d, #0",
+        "st1d {z3.d}, p0, [x20]",
+        "msr nzcv, x21"
       ]
     },
     "vpmovzxbw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x30 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uxtl v16.8h, v17.8b"
+        "mov z2.d, p7/m, z17.d",
+        "uxtl v3.8h, v2.8b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmovzxbw ymm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x30 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uunpklo z16.h, z17.b"
+        "mov z2.d, p7/m, z17.d",
+        "uunpklo z3.h, z2.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmovzxbd xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x31 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uxtl v2.8h, v17.8b",
-        "uxtl v16.4s, v2.4h"
+        "mov z2.d, p7/m, z17.d",
+        "uxtl v3.8h, v2.8b",
+        "uxtl v2.4s, v3.4h",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpmovzxbd ymm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x31 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uunpklo z2.h, z17.b",
-        "uunpklo z16.s, z2.h"
+        "mov z2.d, p7/m, z17.d",
+        "uunpklo z3.h, z2.b",
+        "uunpklo z2.s, z3.h",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpmovzxbq xmm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b01 0x32 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uxtl v2.8h, v17.8b",
-        "uxtl v2.4s, v2.4h",
-        "uxtl v16.2d, v2.2s"
+        "mov z2.d, p7/m, z17.d",
+        "uxtl v3.8h, v2.8b",
+        "uxtl v2.4s, v3.4h",
+        "uxtl v3.2d, v2.2s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmovzxbq ymm0, xmm1": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b01 0x32 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uunpklo z2.h, z17.b",
-        "uunpklo z2.s, z2.h",
-        "uunpklo z16.d, z2.s"
+        "mov z2.d, p7/m, z17.d",
+        "uunpklo z3.h, z2.b",
+        "uunpklo z2.s, z3.h",
+        "uunpklo z3.d, z2.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmovzxwd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x33 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uxtl v16.4s, v17.4h"
+        "mov z2.d, p7/m, z17.d",
+        "uxtl v3.4s, v2.4h",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmovzxwd ymm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x33 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uunpklo z16.s, z17.h"
+        "mov z2.d, p7/m, z17.d",
+        "uunpklo z3.s, z2.h",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmovzxwq xmm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x34 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uxtl v2.4s, v17.4h",
-        "uxtl v16.2d, v2.2s"
+        "mov z2.d, p7/m, z17.d",
+        "uxtl v3.4s, v2.4h",
+        "uxtl v2.2d, v3.2s",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpmovzxwq ymm0, xmm1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x34 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uunpklo z2.s, z17.h",
-        "uunpklo z16.d, z2.s"
+        "mov z2.d, p7/m, z17.d",
+        "uunpklo z3.s, z2.h",
+        "uunpklo z2.d, z3.s",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpmovzxdq xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x35 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "uxtl v16.2d, v17.2s"
+        "mov z2.d, p7/m, z17.d",
+        "uxtl v3.2d, v2.2s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpmovzxdq ymm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x35 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "uunpklo z16.d, z17.s"
+        "mov z2.d, p7/m, z17.d",
+        "uunpklo z3.d, z2.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "Map 2 0b01 0x36 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z2.s, #7",
-        "and z2.d, z17.d, z2.d",
-        "trn1 z2.b, z2.b, z2.b",
-        "trn1 z2.h, z2.h, z2.h",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z4.s, #7",
+        "and z5.d, z2.d, z4.d",
+        "trn1 z2.b, z5.b, z5.b",
+        "trn1 z4.h, z2.h, z2.h",
+        "movprfx z2, z4",
         "lsl z2.b, p7/m, z2.b, #2",
         "mov w20, #0x100",
         "movk w20, #0x302, lsl #16",
-        "mov z3.s, w20",
-        "add z2.b, z2.b, z3.b",
-        "tbl z16.b, {z18.b}, z2.b"
+        "mov z4.s, w20",
+        "add z5.b, z2.b, z4.b",
+        "tbl z2.b, {z3.b}, z5.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpcmpgtq xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x37 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "cmgt v16.2d, v17.2d, v18.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "cmgt v4.2d, v2.2d, v3.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpcmpgtq ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 2 0b01 0x37 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mrs x0, nzcv",
-        "cmpgt p0.d, p7/z, z17.d, z18.d",
-        "not z0.d, p0/m, z17.d",
-        "movprfx z16.d, p0/z, z17.d",
-        "orr z16.d, p0/m, z16.d, z0.d",
-        "msr nzcv, x0"
+        "cmpgt p0.d, p7/z, z2.d, z3.d",
+        "not z0.d, p0/m, z2.d",
+        "movprfx z4.d, p0/z, z2.d",
+        "orr z4.d, p0/m, z4.d, z0.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminsb xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x38 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "smin v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "smin v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminsb ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 2 0b01 0x38 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "smin z16.b, p7/m, z16.b, z17.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "movprfx z4, z2",
+        "smin z4.b, p7/m, z4.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminsb ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 2 0b01 0x38 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "smin z16.b, p7/m, z16.b, z18.b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "smin z4.b, p7/m, z4.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminsb ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b01 0x38 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "smin z16.b, p7/m, z16.b, z18.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "smin z4.b, p7/m, z4.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminsd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x39 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "smin v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "smin v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminsd ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 2 0b01 0x39 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "smin z16.s, p7/m, z16.s, z17.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "movprfx z4, z2",
+        "smin z4.s, p7/m, z4.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminsd ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 2 0b01 0x39 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "smin z16.s, p7/m, z16.s, z18.s"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "smin z4.s, p7/m, z4.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminsd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b01 0x39 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "smin z16.s, p7/m, z16.s, z18.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "smin z4.s, p7/m, z4.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminuw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x3a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "umin v16.8h, v17.8h, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "umin v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminuw ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 2 0b01 0x3a 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "umin z16.h, p7/m, z16.h, z17.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "movprfx z4, z2",
+        "umin z4.h, p7/m, z4.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminuw ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 2 0b01 0x3a 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "umin z16.h, p7/m, z16.h, z18.h"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "umin z4.h, p7/m, z4.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminuw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b01 0x3a 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "umin z16.h, p7/m, z16.h, z18.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "umin z4.h, p7/m, z4.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminud xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x3b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "umin v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "umin v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminud ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 2 0b01 0x3b 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "umin z16.s, p7/m, z16.s, z17.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "movprfx z4, z2",
+        "umin z4.s, p7/m, z4.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminud ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 2 0b01 0x3b 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "umin z16.s, p7/m, z16.s, z18.s"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "umin z4.s, p7/m, z4.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpminud ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b01 0x3b 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "umin z16.s, p7/m, z16.s, z18.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "umin z4.s, p7/m, z4.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxsb xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x3c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "smax v16.16b, v17.16b, v18.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "smax v4.16b, v2.16b, v3.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxsb ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 2 0b01 0x3c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "smax z16.b, p7/m, z16.b, z18.b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "smax z4.b, p7/m, z4.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxsb ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b01 0x3c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "smax z16.b, p7/m, z16.b, z17.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "movprfx z4, z2",
+        "smax z4.b, p7/m, z4.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxsb ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b01 0x3c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "smax z16.b, p7/m, z16.b, z18.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "smax z4.b, p7/m, z4.b, z3.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxsd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x3d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "smax v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "smax v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxsd ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 2 0b01 0x3d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "smax z16.s, p7/m, z16.s, z17.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "movprfx z4, z2",
+        "smax z4.s, p7/m, z4.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxsd ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 2 0b01 0x3d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "smax z16.s, p7/m, z16.s, z18.s"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "smax z4.s, p7/m, z4.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxsd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b01 0x3d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "smax z16.s, p7/m, z16.s, z18.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "smax z4.s, p7/m, z4.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxuw xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x3e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "umax v16.8h, v17.8h, v18.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "umax v4.8h, v2.8h, v3.8h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxuw ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 2 0b01 0x3e 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "umax z16.h, p7/m, z16.h, z17.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "movprfx z4, z2",
+        "umax z4.h, p7/m, z4.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxuw ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 2 0b01 0x3e 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "umax z16.h, p7/m, z16.h, z18.h"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "umax z4.h, p7/m, z4.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxuw ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b01 0x3e 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "umax z16.h, p7/m, z16.h, z18.h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "umax z4.h, p7/m, z4.h, z3.h",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxud xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x3f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "umax v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "umax v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxud ymm0, ymm0, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Aliasing source and destination",
         "Map 2 0b01 0x3f 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "umax z16.s, p7/m, z16.s, z18.s"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "umax z4.s, p7/m, z4.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxud ymm0, ymm1, ymm0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b01 0x3f 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "umax z16.s, p7/m, z16.s, z17.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z16.d",
+        "movprfx z4, z2",
+        "umax z4.s, p7/m, z4.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmaxud ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b01 0x3f 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "umax z16.s, p7/m, z16.s, z18.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movprfx z4, z2",
+        "umax z4.s, p7/m, z4.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmulld xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x40 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mul v16.4s, v17.4s, v18.4s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mul v4.4s, v2.4s, v3.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpmulld ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0x40 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mul z16.s, z17.s, z18.s"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mul z4.s, z2.s, z3.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vphminposuw xmm0, xmm1": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 2 0b01 0x41 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2048]",
-        "zip1 v3.8h, v2.8h, v17.8h",
-        "zip2 v2.8h, v2.8h, v17.8h",
-        "umin v2.4s, v3.4s, v2.4s",
-        "uminv s2, v2.4s",
-        "rev32 v16.8h, v2.8h"
+        "mov z2.d, p7/m, z17.d",
+        "ldr q3, [x28, #2048]",
+        "zip1 v4.8h, v3.8h, v2.8h",
+        "zip2 v5.8h, v3.8h, v2.8h",
+        "umin v2.4s, v4.4s, v5.4s",
+        "uminv s3, v2.4s",
+        "rev32 v2.8h, v3.8h",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsrlvd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x45 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "movi v0.4s, #0x20, lsl #0",
-        "umin v0.4s, v0.4s, v18.4s",
+        "umin v0.4s, v0.4s, v3.4s",
         "neg v0.4s, v0.4s",
-        "ushl v16.4s, v17.4s, v0.4s"
+        "ushl v4.4s, v2.4s, v0.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsrlvd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x45 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mov z1.s, #32",
-        "umin z1.s, p7/m, z1.s, z18.s",
-        "movprfx z16, z17",
-        "lsr z16.s, p7/m, z16.s, z1.s"
+        "umin z1.s, p7/m, z1.s, z3.s",
+        "movprfx z4, z2",
+        "lsr z4.s, p7/m, z4.s, z1.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsrlvq xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 2 0b01 0x45 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mov w0, #0x40",
         "dup v0.2d, x0",
-        "cmhi v1.2d, v18.2d, v0.2d",
-        "bif v0.16b, v18.16b, v1.16b",
+        "cmhi v1.2d, v3.2d, v0.2d",
+        "bif v0.16b, v3.16b, v1.16b",
         "neg v0.2d, v0.2d",
-        "ushl v16.2d, v17.2d, v0.2d"
+        "ushl v4.2d, v2.2d, v0.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsrlvq ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x45 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mov z1.d, #64",
-        "umin z1.d, p7/m, z1.d, z18.d",
-        "movprfx z16, z17",
-        "lsr z16.d, p7/m, z16.d, z1.d"
+        "umin z1.d, p7/m, z1.d, z3.d",
+        "movprfx z4, z2",
+        "lsr z4.d, p7/m, z4.d, z1.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsravd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x46 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "movi v0.4s, #0x1f, lsl #0",
-        "umin v0.4s, v0.4s, v18.4s",
+        "umin v0.4s, v0.4s, v3.4s",
         "neg v0.4s, v0.4s",
-        "sshl v16.4s, v17.4s, v0.4s"
+        "sshl v4.4s, v2.4s, v0.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsravd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mov z0.s, #31",
-        "umin z0.s, p7/m, z0.s, z18.s",
-        "movprfx z16, z17",
-        "asr z16.s, p7/m, z16.s, z0.s"
+        "umin z0.s, p7/m, z0.s, z3.s",
+        "movprfx z4, z2",
+        "asr z4.s, p7/m, z4.s, z0.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsllvd xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b01 0x47 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "movi v0.4s, #0x20, lsl #0",
-        "umin v0.4s, v0.4s, v18.4s",
-        "ushl v16.4s, v17.4s, v0.4s"
+        "umin v0.4s, v0.4s, v3.4s",
+        "ushl v4.4s, v2.4s, v0.4s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsllvd ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x47 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mov z1.s, #32",
-        "umin z1.s, p7/m, z1.s, z18.s",
-        "movprfx z16, z17",
-        "lsl z16.s, p7/m, z16.s, z1.s"
+        "umin z1.s, p7/m, z1.s, z3.s",
+        "movprfx z4, z2",
+        "lsl z4.s, p7/m, z4.s, z1.s",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsllvq xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 2 0b01 0x47 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mov w0, #0x40",
         "dup v0.2d, x0",
-        "cmhi v1.2d, v18.2d, v0.2d",
-        "bif v0.16b, v18.16b, v1.16b",
-        "ushl v16.2d, v17.2d, v0.2d"
+        "cmhi v1.2d, v3.2d, v0.2d",
+        "bif v0.16b, v3.16b, v1.16b",
+        "ushl v4.2d, v2.2d, v0.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsllvq ymm0, ymm1, ymm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x47 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
         "mov z1.d, #64",
-        "umin z1.d, p7/m, z1.d, z18.d",
-        "movprfx z16, z17",
-        "lsl z16.d, p7/m, z16.d, z1.d"
+        "umin z1.d, p7/m, z1.d, z3.d",
+        "movprfx z4, z2",
+        "lsl z4.d, p7/m, z4.d, z1.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpbroadcastd xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x58 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v16.4s, v17.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "dup v3.4s, v2.s[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpbroadcastd xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x58 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1r {v16.4s}, [x4]"
+        "mov x20, x4",
+        "ld1r {v2.4s}, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpbroadcastd ymm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x58 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.s, s17"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.s, s2",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpbroadcastd ymm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x58 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1rw {z16.s}, p7/z, [x4]"
+        "mov x20, x4",
+        "ld1rw {z2.s}, p7/z, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpbroadcastq xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x59 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v16.2d, v17.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "dup v3.2d, v2.d[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpbroadcastq xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x59 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1r {v16.2d}, [x4]"
+        "mov x20, x4",
+        "ld1r {v2.2d}, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpbroadcastq ymm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x59 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, d17"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, d2",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpbroadcastq ymm0, [rax]": {
@@ -1773,45 +2182,55 @@
       "Comment": [
         "Map 2 0b01 0x59 256-bit"
       ],
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ld1rd {z16.d}, p7/z, [x4]"
+        "mov x20, x4",
+        "ld1rd {z2.d}, p7/z, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vbroadcasti128 ymm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x5a 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1rqb {z16.b}, p7/z, [x4]"
+        "mov x20, x4",
+        "ld1rqb {z2.b}, p7/z, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpbroadcastb xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x78 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v16.16b, v17.b[0]"
+        "mov z2.d, p7/m, z17.d",
+        "dup v3.16b, v2.b[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpbroadcastb xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x78 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1r {v16.16b}, [x4]"
+        "mov x20, x4",
+        "ld1r {v2.16b}, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpbroadcastb ymm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x78 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.b, b17"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.b, b2",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpbroadcastb ymm0, [rax]": {
@@ -1819,36 +2238,44 @@
       "Comment": [
         "Map 2 0b01 0x78 256-bit"
       ],
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ld1rb {z16.b}, p7/z, [x4]"
+        "mov x20, x4",
+        "ld1rb {z2.b}, p7/z, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpbroadcastw xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x79 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v16.8h, v17.h[0]"
+        "mov z2.d, p7/m, z17.d",
+        "dup v3.8h, v2.h[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpbroadcastw xmm0, [rax]": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x79 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ld1r {v16.8h}, [x4]"
+        "mov x20, x4",
+        "ld1r {v2.8h}, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpbroadcastw ymm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0x79 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.h, h17"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.h, h2",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpbroadcastw ymm0, [rax]": {
@@ -1856,107 +2283,133 @@
       "Comment": [
         "Map 2 0b01 0x79 256-bit"
       ],
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "ExpectedArm64ASM": [
-        "ld1rh {z16.h}, p7/z, [x4]"
+        "mov x20, x4",
+        "ld1rh {z2.h}, p7/z, [x20]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpmaskmovd xmm0, xmm1, [rax]": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 2 0b01 0x8c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.s, p6/z, z17.s, #0",
-        "ld1w {z2.s}, p0/z, [x4]",
-        "mov v16.16b, v2.16b",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.s, p6/z, z2.s, #0",
+        "ld1w {z3.s}, p0/z, [x20]",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d",
+        "msr nzcv, x21"
       ]
     },
     "vpmaskmovd ymm0, ymm1, [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x8c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.s, p7/z, z17.s, #0",
-        "ld1w {z16.s}, p0/z, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.s, p7/z, z2.s, #0",
+        "ld1w {z3.s}, p0/z, [x20]",
+        "mov z16.d, p7/m, z3.d",
+        "msr nzcv, x21"
       ]
     },
     "vpmaskmovq xmm0, xmm1, [rax]": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 2 0b01 0x8c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.d, p6/z, z17.d, #0",
-        "ld1d {z2.d}, p0/z, [x4]",
-        "mov v16.16b, v2.16b",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.d, p6/z, z2.d, #0",
+        "ld1d {z3.d}, p0/z, [x20]",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d",
+        "msr nzcv, x21"
       ]
     },
     "vpmaskmovq ymm0, ymm1, [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x8c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.d, p7/z, z17.d, #0",
-        "ld1d {z16.d}, p0/z, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.d, p7/z, z2.d, #0",
+        "ld1d {z3.d}, p0/z, [x20]",
+        "mov z16.d, p7/m, z3.d",
+        "msr nzcv, x21"
       ]
     },
     "vpmaskmovd [rax], xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x8e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.s, p6/z, z16.s, #0",
-        "st1w {z17.s}, p0, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.s, p6/z, z2.s, #0",
+        "st1w {z3.s}, p0, [x20]",
+        "msr nzcv, x21"
       ]
     },
     "vpmaskmovd [rax], ymm0, ymm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x8e 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.s, p7/z, z16.s, #0",
-        "st1w {z17.s}, p0, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.s, p7/z, z2.s, #0",
+        "st1w {z3.s}, p0, [x20]",
+        "msr nzcv, x21"
       ]
     },
     "vpmaskmovq [rax], xmm0, xmm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x8e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.d, p6/z, z16.d, #0",
-        "st1d {z17.d}, p0, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.d, p6/z, z2.d, #0",
+        "st1d {z3.d}, p0, [x20]",
+        "msr nzcv, x21"
       ]
     },
     "vpmaskmovq [rax], ymm0, ymm1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0x8e 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mrs x20, nzcv",
-        "cmplt p0.d, p7/z, z16.d, #0",
-        "st1d {z17.d}, p0, [x4]",
-        "msr nzcv, x20"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mrs x21, nzcv",
+        "cmplt p0.d, p7/z, z2.d, #0",
+        "st1d {z3.d}, p0, [x20]",
+        "msr nzcv, x21"
       ]
     },
     "vpgatherdd xmm0, [xmm1*1 + rax], xmm2": {
@@ -3080,25 +3533,30 @@
       ]
     },
     "vaesimc xmm0, xmm1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 2 0b01 0xdb 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "unimplemented (Unimplemented)"
+        "mov z2.d, p7/m, z17.d",
+        "unimplemented (Unimplemented)",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vaesenc xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 2 0b01 0xdc 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v0.16b, v17.16b",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov v0.16b, v2.16b",
         "unimplemented (Unimplemented)",
         "unimplemented (Unimplemented)",
-        "eor v16.16b, v0.16b, v18.16b"
+        "eor v5.16b, v0.16b, v3.16b",
+        "mov z16.d, p7/m, z5.d"
       ]
     },
     "vaesenc ymm0, ymm1, ymm2": {
@@ -3109,15 +3567,18 @@
       ]
     },
     "vaesenclast xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0xdd 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v0.16b, v17.16b",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov v0.16b, v2.16b",
         "unimplemented (Unimplemented)",
-        "eor v16.16b, v0.16b, v18.16b"
+        "eor v5.16b, v0.16b, v3.16b",
+        "mov z16.d, p7/m, z5.d"
       ]
     },
     "vaesenclast ymm0, ymm1, ymm2": {
@@ -3128,16 +3589,19 @@
       ]
     },
     "vaesdec xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 2 0b01 0xde 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v0.16b, v17.16b",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov v0.16b, v2.16b",
         "unimplemented (Unimplemented)",
         "unimplemented (Unimplemented)",
-        "eor v16.16b, v0.16b, v18.16b"
+        "eor v5.16b, v0.16b, v3.16b",
+        "mov z16.d, p7/m, z5.d"
       ]
     },
     "vaesdec ymm0, ymm1, ymm2": {
@@ -3148,15 +3612,18 @@
       ]
     },
     "vaesdeclast xmm0, xmm1, xmm2": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b01 0xdf 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v0.16b, v17.16b",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov v0.16b, v2.16b",
         "unimplemented (Unimplemented)",
-        "eor v16.16b, v0.16b, v18.16b"
+        "eor v5.16b, v0.16b, v3.16b",
+        "mov z16.d, p7/m, z5.d"
       ]
     },
     "vaesdeclast ymm0, ymm1, ymm2": {
@@ -3167,374 +3634,462 @@
       ]
     },
     "andn eax, ebx, ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b00 0xf2 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "bic w4, w5, w7",
-        "mov x26, x4",
-        "tst w4, w4"
+        "mov x20, x7",
+        "mov x21, x5",
+        "bic w22, w21, w20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst w22, w22"
       ]
     },
     "andn rax, rbx, rcx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b00 0xf2 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "bic x4, x5, x7",
-        "mov x26, x4",
-        "tst x4, x4"
+        "mov x20, x7",
+        "mov x21, x5",
+        "bic x22, x21, x20",
+        "mov x4, x22",
+        "mov x26, x22",
+        "tst x22, x22"
       ]
     },
     "bzhi eax, ebx, ecx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 2 0b00 0xf5 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov w20, #0xffffffff",
-        "lsl w20, w20, w5",
-        "bic w20, w7, w20",
-        "tst x5, #0xe0",
-        "csel w4, w7, w20, ne",
+        "mov x20, x7",
+        "mov x21, x5",
+        "mov w22, #0xffffffff",
+        "lsl w23, w22, w21",
+        "bic w22, w20, w23",
+        "tst x21, #0xe0",
+        "csel w21, w20, w22, ne",
+        "mov x4, x21",
         "cset w20, ne",
-        "tst w4, w4",
+        "tst w21, w21",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "orr w22, w21, w20, lsl #29",
+        "msr nzcv, x22"
       ]
     },
     "bzhi rax, rbx, rcx": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 2 0b00 0xf5 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov x20, #0xffffffffffffffff",
-        "lsl x20, x20, x5",
-        "bic x20, x7, x20",
-        "tst x5, #0xc0",
-        "csel x4, x7, x20, ne",
+        "mov x20, x7",
+        "mov x21, x5",
+        "mov x22, #0xffffffffffffffff",
+        "lsl x23, x22, x21",
+        "bic x22, x20, x23",
+        "tst x21, #0xc0",
+        "csel x21, x20, x22, ne",
+        "mov x4, x21",
         "cset w20, ne",
-        "tst x4, x4",
+        "tst x21, x21",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "orr w22, w21, w20, lsl #29",
+        "msr nzcv, x22"
       ]
     },
     "pext eax, ebx, ecx": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "Map 2 0b10 0xf5 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "cbz w5, #+0x2c",
-        "mov w0, w5",
-        "mov w2, w7",
-        "mov w4, wzr",
+        "mov x20, x7",
+        "mov x21, x5",
+        "cbz w21, #+0x2c",
+        "mov w0, w21",
+        "mov w2, w20",
+        "mov w22, wzr",
         "cbz w0, #+0x20",
         "clz w1, w0",
         "lsl w2, w2, w1",
         "lsl w0, w0, w1",
-        "extr w4, w4, w2, #31",
+        "extr w22, w22, w2, #31",
         "bfc w0, #31, #1",
         "b #-0x18",
-        "mov w4, wzr"
+        "mov w22, wzr",
+        "mov x4, x22"
       ]
     },
     "pext rax, rbx, rcx": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "Map 2 0b10 0xf5 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "cbz x5, #+0x2c",
-        "mov x0, x5",
-        "mov x2, x7",
-        "mov x4, xzr",
+        "mov x20, x7",
+        "mov x21, x5",
+        "cbz x21, #+0x2c",
+        "mov x0, x21",
+        "mov x2, x20",
+        "mov x22, xzr",
         "cbz x0, #+0x20",
         "clz x1, x0",
         "lsl x2, x2, x1",
         "lsl x0, x0, x1",
-        "extr x4, x4, x2, #63",
+        "extr x22, x22, x2, #63",
         "bfc x0, #63, #1",
         "b #-0x18",
-        "mov x4, xzr"
+        "mov x22, xzr",
+        "mov x4, x22"
       ]
     },
     "pdep eax, ebx, ecx": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "Map 2 0b11 0xf5 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov x0, x7",
-        "mov x1, x5",
-        "mov w4, #0x0",
-        "cbz w5, #+0x2c",
+        "mov x20, x7",
+        "mov x21, x5",
+        "mov x0, x20",
+        "mov x1, x21",
+        "mov w22, #0x0",
+        "cbz w21, #+0x2c",
         "neg w2, w1",
         "and w2, w2, w1",
         "sbfx w3, w0, #0, #1",
         "eor w1, w1, w2",
         "and w2, w3, w2",
         "neg w3, w1",
-        "orr w4, w4, w2",
+        "orr w22, w22, w2",
         "lsr w0, w0, #1",
         "and w2, w1, w3",
-        "cbnz w2, #-0x1c"
+        "cbnz w2, #-0x1c",
+        "mov x4, x22"
       ]
     },
     "pdep rax, rbx, rcx": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "Map 2 0b11 0xf5 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov x0, x7",
-        "mov x1, x5",
-        "mov x4, #0x0",
-        "cbz x5, #+0x2c",
+        "mov x20, x7",
+        "mov x21, x5",
+        "mov x0, x20",
+        "mov x1, x21",
+        "mov x22, #0x0",
+        "cbz x21, #+0x2c",
         "neg x2, x1",
         "and x2, x2, x1",
         "sbfx x3, x0, #0, #1",
         "eor x1, x1, x2",
         "and x2, x3, x2",
         "neg x3, x1",
-        "orr x4, x4, x2",
+        "orr x22, x22, x2",
         "lsr x0, x0, #1",
         "and x2, x1, x3",
-        "cbnz x2, #-0x1c"
+        "cbnz x2, #-0x1c",
+        "mov x4, x22"
       ]
     },
     "mulx eax, ebx, ecx": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 2 0b11 0xf6 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "mul w7, w5, w6",
-        "ubfx x0, x5, #0, #32",
-        "ubfx x1, x6, #0, #32",
-        "mul x4, x0, x1",
-        "lsr x4, x4, #32"
+        "mov x20, x5",
+        "mov x21, x6",
+        "mul w22, w20, w21",
+        "ubfx x0, x20, #0, #32",
+        "ubfx x1, x21, #0, #32",
+        "mul x23, x0, x1",
+        "lsr x23, x23, #32",
+        "mov x7, x22",
+        "mov x4, x23"
       ]
     },
     "mulx eax, eax, ebx": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Same two destinations should only compute high part",
         "Map 2 0b11 0xf6 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "ubfx x0, x7, #0, #32",
-        "ubfx x1, x6, #0, #32",
-        "mul x4, x0, x1",
-        "lsr x4, x4, #32"
+        "mov x20, x7",
+        "mov x21, x6",
+        "ubfx x0, x20, #0, #32",
+        "ubfx x1, x21, #0, #32",
+        "mul x22, x0, x1",
+        "lsr x22, x22, #32",
+        "mov x4, x22"
       ]
     },
     "mulx eax, ebx, [ecx]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 2 0b11 0xf6 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov w20, w5",
-        "ldr w20, [x20]",
-        "mul w7, w20, w6",
+        "mov x20, x5",
+        "mov w21, w20",
+        "ldr w20, [x21]",
+        "mov x21, x6",
+        "mul w22, w20, w21",
         "ubfx x0, x20, #0, #32",
-        "ubfx x1, x6, #0, #32",
-        "mul x4, x0, x1",
-        "lsr x4, x4, #32"
+        "ubfx x1, x21, #0, #32",
+        "mul x23, x0, x1",
+        "lsr x23, x23, #32",
+        "mov x7, x22",
+        "mov x4, x23"
       ]
     },
     "mulx rax, rbx, rcx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b11 0xf6 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "mul x7, x5, x6",
-        "umulh x4, x5, x6"
+        "mov x20, x5",
+        "mov x21, x6",
+        "mul x22, x20, x21",
+        "umulh x23, x20, x21",
+        "mov x7, x22",
+        "mov x4, x23"
       ]
     },
     "mulx rax, rax, rbx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Same two destinations should only compute high part",
         "Map 2 0b11 0xf6 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "umulh x4, x7, x6"
+        "mov x20, x7",
+        "mov x21, x6",
+        "umulh x22, x20, x21",
+        "mov x4, x22"
       ]
     },
     "mulx rax, rbx, [rcx]": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 2 0b11 0xf6 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr x20, [x5]",
-        "mul x7, x20, x6",
-        "umulh x4, x20, x6"
+        "mov x20, x5",
+        "ldr x21, [x20]",
+        "mov x20, x6",
+        "mul x22, x21, x20",
+        "umulh x23, x21, x20",
+        "mov x7, x22",
+        "mov x4, x23"
       ]
     },
     "bextr eax, ebx, ecx": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "Map 2 0b00 0xf7 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "uxtb w20, w5",
-        "lsr w21, w7, w20",
-        "mov w22, #0x0",
+        "mov x20, x7",
+        "mov x21, x5",
+        "uxtb w22, w21",
+        "lsr w23, w20, w22",
+        "mov w20, #0x0",
+        "cmp w22, #0x1f (31)",
+        "csel w24, w23, w20, ls",
+        "ubfx w20, w21, #8, #8",
+        "mov x21, #0xffffffffffffffff",
+        "lsl w22, w21, w20",
+        "bic w21, w24, w22",
         "cmp w20, #0x1f (31)",
-        "csel w20, w21, w22, ls",
-        "ubfx w21, w5, #8, #8",
-        "mov x22, #0xffffffffffffffff",
-        "lsl w22, w22, w21",
-        "bic w22, w20, w22",
-        "cmp w21, #0x1f (31)",
-        "csel w4, w22, w20, ls",
-        "tst w4, w4"
+        "csel w22, w21, w24, ls",
+        "mov x4, x22",
+        "tst w22, w22"
       ]
     },
     "bextr rax, rbx, rcx": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "Map 2 0b00 0xf7 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "uxtb x20, w5",
-        "lsr x21, x7, x20",
-        "mov w22, #0x0",
+        "mov x20, x7",
+        "mov x21, x5",
+        "uxtb x22, w21",
+        "lsr x23, x20, x22",
+        "mov w20, #0x0",
+        "cmp x22, #0x3f (63)",
+        "csel x24, x23, x20, ls",
+        "ubfx x20, x21, #8, #8",
+        "mov x21, #0xffffffffffffffff",
+        "lsl x22, x21, x20",
+        "bic x21, x24, x22",
         "cmp x20, #0x3f (63)",
-        "csel x20, x21, x22, ls",
-        "ubfx x21, x5, #8, #8",
-        "mov x22, #0xffffffffffffffff",
-        "lsl x22, x22, x21",
-        "bic x22, x20, x22",
-        "cmp x21, #0x3f (63)",
-        "csel x4, x22, x20, ls",
-        "tst x4, x4"
+        "csel x22, x21, x24, ls",
+        "mov x4, x22",
+        "tst x22, x22"
       ]
     },
     "shlx eax, ebx, ecx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0xf7 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "lsl w4, w7, w5"
+        "mov x20, x7",
+        "mov x21, x5",
+        "lsl w22, w20, w21",
+        "mov x4, x22"
       ]
     },
     "shlx eax, [ebx], ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b01 0xf7 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "ldr w20, [x20]",
-        "lsl w4, w20, w5"
+        "mov x20, x7",
+        "mov w21, w20",
+        "ldr w20, [x21]",
+        "mov x21, x5",
+        "lsl w22, w20, w21",
+        "mov x4, x22"
       ]
     },
     "shlx rax, rbx, rcx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b01 0xf7 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "lsl x4, x7, x5"
+        "mov x20, x7",
+        "mov x21, x5",
+        "lsl x22, x20, x21",
+        "mov x4, x22"
       ]
     },
     "shlx rax, [rbx], rcx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b01 0xf7 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr x20, [x7]",
-        "lsl x4, x20, x5"
+        "mov x20, x7",
+        "ldr x21, [x20]",
+        "mov x20, x5",
+        "lsl x22, x21, x20",
+        "mov x4, x22"
       ]
     },
     "sarx eax, ebx, ecx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b10 0xf7 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "asr w4, w7, w5"
+        "mov x20, x7",
+        "mov x21, x5",
+        "asr w22, w20, w21",
+        "mov x4, x22"
       ]
     },
     "sarx eax, [ebx], ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b10 0xf7 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "ldr w20, [x20]",
-        "asr w4, w20, w5"
+        "mov x20, x7",
+        "mov w21, w20",
+        "ldr w20, [x21]",
+        "mov x21, x5",
+        "asr w22, w20, w21",
+        "mov x4, x22"
       ]
     },
     "sarx rax, rbx, rcx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b10 0xf7 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "asr x4, x7, x5"
+        "mov x20, x7",
+        "mov x21, x5",
+        "asr x22, x20, x21",
+        "mov x4, x22"
       ]
     },
     "sarx rax, [rbx], rcx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b10 0xf7 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr x20, [x7]",
-        "asr x4, x20, x5"
+        "mov x20, x7",
+        "ldr x21, [x20]",
+        "mov x20, x5",
+        "asr x22, x21, x20",
+        "mov x4, x22"
       ]
     },
     "shrx eax, ebx, ecx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b11 0xf7 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "lsr w4, w7, w5"
+        "mov x20, x7",
+        "mov x21, x5",
+        "lsr w22, w20, w21",
+        "mov x4, x22"
       ]
     },
     "shrx eax, [ebx], ecx": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 2 0b11 0xf7 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov w20, w7",
-        "ldr w20, [x20]",
-        "lsr w4, w20, w5"
+        "mov x20, x7",
+        "mov w21, w20",
+        "ldr w20, [x21]",
+        "mov x21, x5",
+        "lsr w22, w20, w21",
+        "mov x4, x22"
       ]
     },
     "shrx rax, rbx, rcx": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 2 0b11 0xf7 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "lsr x4, x7, x5"
+        "mov x20, x7",
+        "mov x21, x5",
+        "lsr x22, x20, x21",
+        "mov x4, x22"
       ]
     },
     "shrx rax, [rbx], rcx": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 2 0b11 0xf7 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr x20, [x7]",
-        "lsr x4, x20, x5"
+        "mov x20, x7",
+        "ldr x21, [x20]",
+        "mov x20, x5",
+        "lsr x22, x21, x20",
+        "mov x4, x22"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/VEX_map3.json b/unittests/InstructionCountCI/VEX_map3.json
index 2a58bbef8b..ca573c1450 100644
--- a/unittests/InstructionCountCI/VEX_map3.json
+++ b/unittests/InstructionCountCI/VEX_map3.json
@@ -11,289 +11,407 @@
   },
   "Instructions": {
     "vpermq ymm0, ymm1, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x00 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, d17"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, d2",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermq ymm0, ymm1, 01010101b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x00 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, z17.d[1]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, z2.d[1]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermq ymm0, ymm1, 10101010b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x00 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, z17.d[2]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, z2.d[2]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermq ymm0, ymm1, 11111111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x00 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, z17.d[3]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, z2.d[3]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermpd ymm0, ymm1, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x01 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, d17"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, d2",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermpd ymm0, ymm1, 01010101b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x01 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, z17.d[1]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, z2.d[1]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermpd ymm0, ymm1, 10101010b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x01 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, z17.d[2]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, z2.d[2]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermpd ymm0, ymm1, 11111111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x01 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, z17.d[3]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, z2.d[3]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpblendd xmm0, xmm1, 0000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x02 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpblendd xmm0, xmm1, 0001b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 3 0b01 0x02 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v17.s[0]",
-        "mov v2.s[1], v16.s[1]",
-        "mov v2.s[2], v16.s[2]",
-        "mov v2.s[3], v16.s[3]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[0], v3.s[0]",
+        "mov v3.16b, v5.16b",
+        "mov v3.s[1], v2.s[1]",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[2], v2.s[2]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[3], v2.s[3]",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpblendd xmm0, xmm1, 0010b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 3 0b01 0x02 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v16.s[0]",
-        "mov v2.s[1], v17.s[1]",
-        "mov v2.s[2], v16.s[2]",
-        "mov v2.s[3], v16.s[3]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[0], v2.s[0]",
+        "mov v4.16b, v5.16b",
+        "mov v4.s[1], v3.s[1]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[2], v2.s[2]",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[3], v2.s[3]",
+        "mov v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpblendd xmm0, xmm1, 0011b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 3 0b01 0x02 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v17.s[0]",
-        "mov v2.s[1], v17.s[1]",
-        "mov v2.s[2], v16.s[2]",
-        "mov v2.s[3], v16.s[3]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[0], v3.s[0]",
+        "mov v4.16b, v5.16b",
+        "mov v4.s[1], v3.s[1]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[2], v2.s[2]",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[3], v2.s[3]",
+        "mov v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpblendd xmm0, xmm1, 0100b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 3 0b01 0x02 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v16.s[0]",
-        "mov v2.s[1], v16.s[1]",
-        "mov v2.s[2], v17.s[2]",
-        "mov v2.s[3], v16.s[3]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[0], v2.s[0]",
+        "mov v4.16b, v5.16b",
+        "mov v4.s[1], v2.s[1]",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[2], v3.s[2]",
+        "mov v3.16b, v5.16b",
+        "mov v3.s[3], v2.s[3]",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpblendd xmm0, xmm1, 0101b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 3 0b01 0x02 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v17.s[0]",
-        "mov v2.s[1], v16.s[1]",
-        "mov v2.s[2], v17.s[2]",
-        "mov v2.s[3], v16.s[3]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[0], v3.s[0]",
+        "mov v4.16b, v5.16b",
+        "mov v4.s[1], v2.s[1]",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[2], v3.s[2]",
+        "mov v3.16b, v5.16b",
+        "mov v3.s[3], v2.s[3]",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpblendd xmm0, xmm1, 0110b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 3 0b01 0x02 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v16.s[0]",
-        "mov v2.s[1], v17.s[1]",
-        "mov v2.s[2], v17.s[2]",
-        "mov v2.s[3], v16.s[3]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[0], v2.s[0]",
+        "mov v4.16b, v5.16b",
+        "mov v4.s[1], v3.s[1]",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[2], v3.s[2]",
+        "mov v3.16b, v5.16b",
+        "mov v3.s[3], v2.s[3]",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpblendd xmm0, xmm1, 0111b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 3 0b01 0x02 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v17.s[0]",
-        "mov v2.s[1], v17.s[1]",
-        "mov v2.s[2], v17.s[2]",
-        "mov v2.s[3], v16.s[3]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[0], v3.s[0]",
+        "mov v4.16b, v5.16b",
+        "mov v4.s[1], v3.s[1]",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[2], v3.s[2]",
+        "mov v3.16b, v5.16b",
+        "mov v3.s[3], v2.s[3]",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpblendd xmm0, xmm1, 1000b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 3 0b01 0x02 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v16.s[0]",
-        "mov v2.s[1], v16.s[1]",
-        "mov v2.s[2], v16.s[2]",
-        "mov v2.s[3], v17.s[3]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[0], v2.s[0]",
+        "mov v4.16b, v5.16b",
+        "mov v4.s[1], v2.s[1]",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[2], v2.s[2]",
+        "mov v2.16b, v5.16b",
+        "mov v2.s[3], v3.s[3]",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpblendd xmm0, xmm1, 1001b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 3 0b01 0x02 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v17.s[0]",
-        "mov v2.s[1], v16.s[1]",
-        "mov v2.s[2], v16.s[2]",
-        "mov v2.s[3], v17.s[3]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[0], v3.s[0]",
+        "mov v4.16b, v5.16b",
+        "mov v4.s[1], v2.s[1]",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[2], v2.s[2]",
+        "mov v2.16b, v5.16b",
+        "mov v2.s[3], v3.s[3]",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpblendd xmm0, xmm1, 1010b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 3 0b01 0x02 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v16.s[0]",
-        "mov v2.s[1], v17.s[1]",
-        "mov v2.s[2], v16.s[2]",
-        "mov v2.s[3], v17.s[3]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[0], v2.s[0]",
+        "mov v4.16b, v5.16b",
+        "mov v4.s[1], v3.s[1]",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[2], v2.s[2]",
+        "mov v2.16b, v5.16b",
+        "mov v2.s[3], v3.s[3]",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpblendd xmm0, xmm1, 1011b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 3 0b01 0x02 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v17.s[0]",
-        "mov v2.s[1], v17.s[1]",
-        "mov v2.s[2], v16.s[2]",
-        "mov v2.s[3], v17.s[3]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[0], v3.s[0]",
+        "mov v4.16b, v5.16b",
+        "mov v4.s[1], v3.s[1]",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[2], v2.s[2]",
+        "mov v2.16b, v5.16b",
+        "mov v2.s[3], v3.s[3]",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpblendd xmm0, xmm1, 1100b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 3 0b01 0x02 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v16.s[0]",
-        "mov v2.s[1], v16.s[1]",
-        "mov v2.s[2], v17.s[2]",
-        "mov v2.s[3], v17.s[3]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[0], v2.s[0]",
+        "mov v4.16b, v5.16b",
+        "mov v4.s[1], v2.s[1]",
+        "mov v2.16b, v4.16b",
+        "mov v2.s[2], v3.s[2]",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[3], v3.s[3]",
+        "mov v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpblendd xmm0, xmm1, 1101b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 3 0b01 0x02 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v17.s[0]",
-        "mov v2.s[1], v16.s[1]",
-        "mov v2.s[2], v17.s[2]",
-        "mov v2.s[3], v17.s[3]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[0], v3.s[0]",
+        "mov v4.16b, v5.16b",
+        "mov v4.s[1], v2.s[1]",
+        "mov v2.16b, v4.16b",
+        "mov v2.s[2], v3.s[2]",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[3], v3.s[3]",
+        "mov v2.16b, v4.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpblendd xmm0, xmm1, 1110b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 3 0b01 0x02 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v16.s[0]",
-        "mov v2.s[1], v17.s[1]",
-        "mov v2.s[2], v17.s[2]",
-        "mov v2.s[3], v17.s[3]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[0], v2.s[0]",
+        "mov v2.16b, v5.16b",
+        "mov v2.s[1], v3.s[1]",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[2], v3.s[2]",
+        "mov v2.16b, v4.16b",
+        "mov v2.s[3], v3.s[3]",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpblendd xmm0, xmm1, 1111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x02 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpblendd ymm0, ymm1, 00000000b": {
@@ -307,2757 +425,3274 @@
       ]
     },
     "vpblendd ymm0, ymm1, 01010101b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 60,
       "Comment": [
         "Map 3 0b01 0x02 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.s, s17",
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movi v4.2d, #0x0",
+        "mov z1.s, s3",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z16.s[1]",
+        "mov z1.s, z2.s[1]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[2]",
+        "mov z1.s, z3.s[2]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z16.s[3]",
+        "mov z1.s, z2.s[3]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z3.s[4]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z16.s[5]",
+        "mov z1.s, z2.s[5]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[6]",
+        "mov z1.s, z3.s[6]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z16.s[7]",
-        "mov z16.d, z2.d",
+        "mov z1.s, z2.s[7]",
+        "mov z3.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
-        "mov z16.s, p0/m, z1.s",
-        "msr nzcv, x0"
+        "mov z3.s, p0/m, z1.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpblendd ymm0, ymm1, 10101010b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 60,
       "Comment": [
         "Map 3 0b01 0x02 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.s, s16",
+        "mov z2.d, p7/m, z16.d",
+        "mov z3.d, p7/m, z17.d",
+        "movi v4.2d, #0x0",
+        "mov z1.s, s2",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[1]",
+        "mov z1.s, z3.s[1]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z16.s[2]",
+        "mov z1.s, z2.s[2]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[3]",
+        "mov z1.s, z3.s[3]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z16.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[5]",
+        "mov z1.s, z3.s[5]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z16.s[6]",
+        "mov z1.s, z2.s[6]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[7]",
-        "mov z16.d, z2.d",
+        "mov z1.s, z3.s[7]",
+        "mov z2.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
-        "mov z16.s, p0/m, z1.s",
-        "msr nzcv, x0"
+        "mov z2.s, p0/m, z1.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpblendd ymm0, ymm1, 11111111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x02 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpermilps xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x03 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v17.s[0]",
-        "mov v2.s[1], v17.s[0]",
-        "mov v2.s[2], v17.s[0]",
-        "mov v16.16b, v2.16b",
-        "mov v16.s[3], v17.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[0], v2.s[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[1], v2.s[0]",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[2], v2.s[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[3], v2.s[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilps xmm0, xmm1, 01010101b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x03 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v17.s[1]",
-        "mov v2.s[1], v17.s[1]",
-        "mov v2.s[2], v17.s[1]",
-        "mov v16.16b, v2.16b",
-        "mov v16.s[3], v17.s[1]"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[0], v2.s[1]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[1], v2.s[1]",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[2], v2.s[1]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[3], v2.s[1]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilps xmm0, xmm1, 10101010b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x03 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v17.s[2]",
-        "mov v2.s[1], v17.s[2]",
-        "mov v2.s[2], v17.s[2]",
-        "mov v16.16b, v2.16b",
-        "mov v16.s[3], v17.s[2]"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[0], v2.s[2]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[1], v2.s[2]",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[2], v2.s[2]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[3], v2.s[2]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilps xmm0, xmm1, 11111111b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x03 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v17.s[3]",
-        "mov v2.s[1], v17.s[3]",
-        "mov v2.s[2], v17.s[3]",
-        "mov v16.16b, v2.16b",
-        "mov v16.s[3], v17.s[3]"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[0], v2.s[3]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[1], v2.s[3]",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[2], v2.s[3]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[3], v2.s[3]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilps ymm0, ymm1, 00000000b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 59,
       "Comment": [
         "Map 3 0b01 0x03 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.s, s17",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.s, s2",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s17",
+        "mov z1.s, s2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
-        "mov z16.d, z2.d",
+        "mov z1.s, z2.s[4]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
-        "mov z16.s, p0/m, z1.s",
-        "msr nzcv, x0"
+        "mov z3.s, p0/m, z1.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilps ymm0, ymm1, 01010101b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 59,
       "Comment": [
         "Map 3 0b01 0x03 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.s, z17.s[1]",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.s, z2.s[1]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[1]",
+        "mov z1.s, z2.s[1]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[1]",
+        "mov z1.s, z2.s[1]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[1]",
+        "mov z1.s, z2.s[1]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[5]",
+        "mov z1.s, z2.s[5]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[5]",
+        "mov z1.s, z2.s[5]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[5]",
+        "mov z1.s, z2.s[5]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[5]",
-        "mov z16.d, z2.d",
+        "mov z1.s, z2.s[5]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
-        "mov z16.s, p0/m, z1.s",
-        "msr nzcv, x0"
+        "mov z3.s, p0/m, z1.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilps ymm0, ymm1, 10101010b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 59,
       "Comment": [
         "Map 3 0b01 0x03 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.s, z17.s[2]",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.s, z2.s[2]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[2]",
+        "mov z1.s, z2.s[2]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[2]",
+        "mov z1.s, z2.s[2]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[2]",
+        "mov z1.s, z2.s[2]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[6]",
+        "mov z1.s, z2.s[6]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[6]",
+        "mov z1.s, z2.s[6]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[6]",
+        "mov z1.s, z2.s[6]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[6]",
-        "mov z16.d, z2.d",
+        "mov z1.s, z2.s[6]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
-        "mov z16.s, p0/m, z1.s",
-        "msr nzcv, x0"
+        "mov z3.s, p0/m, z1.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilps ymm0, ymm1, 11111111b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 59,
       "Comment": [
         "Map 3 0b01 0x03 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.s, z17.s[3]",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.s, z2.s[3]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[3]",
+        "mov z1.s, z2.s[3]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[3]",
+        "mov z1.s, z2.s[3]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[3]",
+        "mov z1.s, z2.s[3]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[7]",
+        "mov z1.s, z2.s[7]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[7]",
+        "mov z1.s, z2.s[7]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[7]",
+        "mov z1.s, z2.s[7]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[7]",
-        "mov z16.d, z2.d",
+        "mov z1.s, z2.s[7]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
-        "mov z16.s, p0/m, z1.s",
-        "msr nzcv, x0"
+        "mov z3.s, p0/m, z1.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd xmm0, xmm1, 00b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0x05 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.d[0], v17.d[0]",
-        "mov v16.16b, v2.16b",
-        "mov v16.d[1], v17.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov v4.16b, v3.16b",
+        "mov v4.d[0], v2.d[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.d[1], v2.d[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd xmm0, xmm1, 01b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0x05 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.d[0], v17.d[1]",
-        "mov v16.16b, v2.16b",
-        "mov v16.d[1], v17.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov v4.16b, v3.16b",
+        "mov v4.d[0], v2.d[1]",
+        "mov v3.16b, v4.16b",
+        "mov v3.d[1], v2.d[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd xmm0, xmm1, 10b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0x05 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.d[0], v17.d[0]",
-        "mov v16.16b, v2.16b",
-        "mov v16.d[1], v17.d[1]"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov v4.16b, v3.16b",
+        "mov v4.d[0], v2.d[0]",
+        "mov v3.16b, v4.16b",
+        "mov v3.d[1], v2.d[1]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd xmm0, xmm1, 11b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0x05 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.d[0], v17.d[1]",
-        "mov v16.16b, v2.16b",
-        "mov v16.d[1], v17.d[1]"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov v4.16b, v3.16b",
+        "mov v4.d[0], v2.d[1]",
+        "mov v3.16b, v4.16b",
+        "mov v3.d[1], v2.d[1]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd ymm0, ymm1, 0000b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 3 0b01 0x05 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d17",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.d, d2",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, d17",
+        "mov z1.d, d2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
+        "mov z1.d, z2.d[2]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[2]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd ymm0, ymm1, 0001b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 3 0b01 0x05 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, z17.d[1]",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.d, z2.d[1]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, d17",
+        "mov z1.d, d2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
+        "mov z1.d, z2.d[2]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[2]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd ymm0, ymm1, 0010b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 3 0b01 0x05 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d17",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.d, d2",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[1]",
+        "mov z1.d, z2.d[1]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
+        "mov z1.d, z2.d[2]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[2]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd ymm0, ymm1, 0011b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 3 0b01 0x05 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, z17.d[1]",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.d, z2.d[1]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[1]",
+        "mov z1.d, z2.d[1]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
+        "mov z1.d, z2.d[2]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[2]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd ymm0, ymm1, 0100b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 3 0b01 0x05 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d17",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.d, d2",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, d17",
+        "mov z1.d, d2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
+        "mov z1.d, z2.d[3]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[2]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd ymm0, ymm1, 0101b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 3 0b01 0x05 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, z17.d[1]",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.d, z2.d[1]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, d17",
+        "mov z1.d, d2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
+        "mov z1.d, z2.d[3]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[2]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd ymm0, ymm1, 0110b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 3 0b01 0x05 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d17",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.d, d2",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[1]",
+        "mov z1.d, z2.d[1]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
+        "mov z1.d, z2.d[3]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[2]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd ymm0, ymm1, 0111b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 3 0b01 0x05 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, z17.d[1]",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.d, z2.d[1]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[1]",
+        "mov z1.d, z2.d[1]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
+        "mov z1.d, z2.d[3]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[2]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd ymm0, ymm1, 1000b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 3 0b01 0x05 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d17",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.d, d2",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, d17",
+        "mov z1.d, d2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
+        "mov z1.d, z2.d[2]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[3]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd ymm0, ymm1, 1001b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 3 0b01 0x05 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, z17.d[1]",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.d, z2.d[1]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, d17",
+        "mov z1.d, d2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
+        "mov z1.d, z2.d[2]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[3]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd ymm0, ymm1, 1010b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 3 0b01 0x05 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d17",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.d, d2",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[1]",
+        "mov z1.d, z2.d[1]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
+        "mov z1.d, z2.d[2]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[3]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd ymm0, ymm1, 1011b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 3 0b01 0x05 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, z17.d[1]",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.d, z2.d[1]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[1]",
+        "mov z1.d, z2.d[1]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
+        "mov z1.d, z2.d[2]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[3]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd ymm0, ymm1, 1100b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 3 0b01 0x05 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d17",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.d, d2",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, d17",
+        "mov z1.d, d2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
+        "mov z1.d, z2.d[3]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[3]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd ymm0, ymm1, 1101b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 3 0b01 0x05 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, z17.d[1]",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.d, z2.d[1]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, d17",
+        "mov z1.d, d2",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
+        "mov z1.d, z2.d[3]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[3]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd ymm0, ymm1, 1110b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 3 0b01 0x05 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d17",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.d, d2",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[1]",
+        "mov z1.d, z2.d[1]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
+        "mov z1.d, z2.d[3]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[3]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpermilpd ymm0, ymm1, 1111b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "Map 3 0b01 0x05 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, z17.d[1]",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.d, z2.d[1]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[1]",
+        "mov z1.d, z2.d[1]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
+        "mov z1.d, z2.d[3]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[3]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00000000b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q17",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, q17",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z1.q, q2",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00000001b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z17.q[1]",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, q17",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z1.q, q2",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00000010b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q18",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, q17",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.q, q3",
+        "mov z5.d, z4.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, q2",
+        "mov z3.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00000011b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z18.q[1]",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, q17",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.q, z3.q[1]",
+        "mov z5.d, z4.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, q2",
+        "mov z3.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00010000b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q17",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, z17.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z1.q, z2.q[1]",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00010001b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z17.q[1]",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, z17.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z1.q, z2.q[1]",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00010010b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q18",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, z17.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.q, q3",
+        "mov z5.d, z4.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, z2.q[1]",
+        "mov z3.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00010011b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z18.q[1]",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, z17.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.q, z3.q[1]",
+        "mov z5.d, z4.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, z2.q[1]",
+        "mov z3.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00100000b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q17",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, q18",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z5.d, z4.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, q3",
+        "mov z2.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00100001b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z17.q[1]",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, q18",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z5.d, z4.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, q3",
+        "mov z2.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00100010b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q18",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, q18",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z18.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z1.q, q2",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00100011b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z18.q[1]",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, q18",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z18.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z1.q, q2",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00110000b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q17",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, z18.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z5.d, z4.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, z3.q[1]",
+        "mov z2.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00110001b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z17.q[1]",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, z18.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z5.d, z4.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, z3.q[1]",
+        "mov z2.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00110010b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q18",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, z18.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z18.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z1.q, z2.q[1]",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00110011b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z18.q[1]",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, z18.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z18.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z1.q, z2.q[1]",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00001000b": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q17",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z4.d, z3.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z4.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00011000b": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z17.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z4.d, z3.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z4.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00101000b": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q18",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z18.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z4.d, z3.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z4.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 00111000b": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z18.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z18.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z4.d, z3.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z4.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 10001000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 10000000b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q17",
-        "mov z16.d, z2.d",
-        "mov z16.b, p6/m, z1.b"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 10000001b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z17.q[1]",
-        "mov z16.d, z2.d",
-        "mov z16.b, p6/m, z1.b"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 10000010b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q18",
-        "mov z16.d, z2.d",
-        "mov z16.b, p6/m, z1.b"
+        "mov z2.d, p7/m, z18.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vperm2f128 ymm0, ymm1, ymm2, 10000011b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 3 0b01 0x06 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z18.q[1]",
-        "mov z16.d, z2.d",
-        "mov z16.b, p6/m, z1.b"
+        "mov z2.d, p7/m, z18.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vroundps xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "nearest rounding",
         "Map 3 0b01 0x08 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "frintn v16.4s, v17.4s"
+        "mov z2.d, p7/m, z17.d",
+        "frintn v3.4s, v2.4s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundps xmm0, xmm1, 00000001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "-inf rounding",
         "Map 3 0b01 0x08 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "frintm v16.4s, v17.4s"
+        "mov z2.d, p7/m, z17.d",
+        "frintm v3.4s, v2.4s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundps xmm0, xmm1, 00000010b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "+inf rounding",
         "Map 3 0b01 0x08 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "frintp v16.4s, v17.4s"
+        "mov z2.d, p7/m, z17.d",
+        "frintp v3.4s, v2.4s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundps xmm0, xmm1, 00000011b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "truncate rounding",
         "Map 3 0b01 0x08 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "frintz v16.4s, v17.4s"
+        "mov z2.d, p7/m, z17.d",
+        "frintz v3.4s, v2.4s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundps xmm0, xmm1, 00000100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "host mode rounding",
         "Map 3 0b01 0x08 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "frinti v16.4s, v17.4s"
+        "mov z2.d, p7/m, z17.d",
+        "frinti v3.4s, v2.4s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundps ymm0, ymm1, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "nearest rounding",
         "Map 3 0b01 0x08 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "frintn z16.s, p7/m, z17.s"
+        "mov z2.d, p7/m, z17.d",
+        "frintn z3.s, p7/m, z2.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundps ymm0, ymm1, 00000001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "-inf rounding",
         "Map 3 0b01 0x08 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "frintm z16.s, p7/m, z17.s"
+        "mov z2.d, p7/m, z17.d",
+        "frintm z3.s, p7/m, z2.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundps ymm0, ymm1, 00000010b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "+inf rounding",
         "Map 3 0b01 0x08 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "frintp z16.s, p7/m, z17.s"
+        "mov z2.d, p7/m, z17.d",
+        "frintp z3.s, p7/m, z2.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundps ymm0, ymm1, 00000011b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "truncate rounding",
         "Map 3 0b01 0x08 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "frintz z16.s, p7/m, z17.s"
+        "mov z2.d, p7/m, z17.d",
+        "frintz z3.s, p7/m, z2.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundps ymm0, ymm1, 00000100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "host mode rounding",
         "Map 3 0b01 0x08 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "frinti z16.s, p7/m, z17.s"
+        "mov z2.d, p7/m, z17.d",
+        "frinti z3.s, p7/m, z2.s",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundpd xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "nearest rounding",
         "Map 3 0b01 0x09 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "frintn v16.2d, v17.2d"
+        "mov z2.d, p7/m, z17.d",
+        "frintn v3.2d, v2.2d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundpd xmm0, xmm1, 00000001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "-inf rounding",
         "Map 3 0b01 0x09 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "frintm v16.2d, v17.2d"
+        "mov z2.d, p7/m, z17.d",
+        "frintm v3.2d, v2.2d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundpd xmm0, xmm1, 00000010b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "+inf rounding",
         "Map 3 0b01 0x09 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "frintp v16.2d, v17.2d"
+        "mov z2.d, p7/m, z17.d",
+        "frintp v3.2d, v2.2d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundpd xmm0, xmm1, 00000011b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "truncate rounding",
         "Map 3 0b01 0x09 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "frintz v16.2d, v17.2d"
+        "mov z2.d, p7/m, z17.d",
+        "frintz v3.2d, v2.2d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundpd xmm0, xmm1, 00000100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "host mode rounding",
         "Map 3 0b01 0x09 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "frinti v16.2d, v17.2d"
+        "mov z2.d, p7/m, z17.d",
+        "frinti v3.2d, v2.2d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundpd ymm0, ymm1, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "nearest rounding",
         "Map 3 0b01 0x09 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "frintn z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "frintn z3.d, p7/m, z2.d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundpd ymm0, ymm1, 00000001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "-inf rounding",
         "Map 3 0b01 0x09 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "frintm z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "frintm z3.d, p7/m, z2.d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundpd ymm0, ymm1, 00000010b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "+inf rounding",
         "Map 3 0b01 0x09 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "frintp z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "frintp z3.d, p7/m, z2.d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundpd ymm0, ymm1, 00000011b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "truncate rounding",
         "Map 3 0b01 0x09 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "frintz z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "frintz z3.d, p7/m, z2.d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundpd ymm0, ymm1, 00000100b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "host mode rounding",
         "Map 3 0b01 0x09 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "frinti z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "frinti z3.d, p7/m, z2.d",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundss xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "nearest rounding",
         "Map 3 0b01 0x0a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frintn s0, s16",
-        "mov v16.s[0], v0.s[0]"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frintn s0, s2",
+        "mov v3.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundss xmm0, xmm1, 00000001b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "-inf rounding",
         "Map 3 0b01 0x0a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frintm s0, s16",
-        "mov v16.s[0], v0.s[0]"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frintm s0, s2",
+        "mov v3.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundss xmm0, xmm1, 00000010b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "+inf rounding",
         "Map 3 0b01 0x0a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frintp s0, s16",
-        "mov v16.s[0], v0.s[0]"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frintp s0, s2",
+        "mov v3.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundss xmm0, xmm1, 00000011b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "truncate rounding",
         "Map 3 0b01 0x0a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frintz s0, s16",
-        "mov v16.s[0], v0.s[0]"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frintz s0, s2",
+        "mov v3.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundss xmm0, xmm1, 00000100b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "host mode rounding",
         "Map 3 0b01 0x0a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frinti s0, s16",
-        "mov v16.s[0], v0.s[0]"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frinti s0, s2",
+        "mov v3.s[0], v0.s[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundsd xmm0, xmm1, 00000000b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "nearest rounding",
         "Map 3 0b01 0x0b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frintn d0, d16",
-        "mov v16.d[0], v0.d[0]"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frintn d0, d2",
+        "mov v3.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundsd xmm0, xmm1, 00000001b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "-inf rounding",
         "Map 3 0b01 0x0b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frintm d0, d16",
-        "mov v16.d[0], v0.d[0]"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frintm d0, d2",
+        "mov v3.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundsd xmm0, xmm1, 00000010b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "+inf rounding",
         "Map 3 0b01 0x0b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frintp d0, d16",
-        "mov v16.d[0], v0.d[0]"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frintp d0, d2",
+        "mov v3.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundsd xmm0, xmm1, 00000011b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "truncate rounding",
         "Map 3 0b01 0x0b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frintz d0, d16",
-        "mov v16.d[0], v0.d[0]"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frintz d0, d2",
+        "mov v3.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vroundsd xmm0, xmm1, 00000100b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "host mode rounding",
         "Map 3 0b01 0x0b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v16.16b",
-        "frinti d0, d16",
-        "mov v16.d[0], v0.d[0]"
+        "mov z2.d, p7/m, z16.d",
+        "mov v3.16b, v2.16b",
+        "frinti d0, d2",
+        "mov v3.d[0], v0.d[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vblendps xmm0, xmm1, xmm2, 0000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x0c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vblendps xmm0, xmm1, xmm2, 0001b": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "Map 3 0b01 0x0c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.s[0], v18.s[0]",
-        "mov v2.s[1], v17.s[1]",
-        "mov v2.s[2], v17.s[2]",
-        "mov v2.s[3], v17.s[3]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.s[0], v3.s[0]",
+        "mov v3.16b, v5.16b",
+        "mov v3.s[1], v2.s[1]",
+        "mov v4.16b, v3.16b",
+        "mov v4.s[2], v2.s[2]",
+        "mov v3.16b, v4.16b",
+        "mov v3.s[3], v2.s[3]",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vblendps xmm0, xmm1, xmm2, 1111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x0c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v18.16b"
+        "mov z2.d, p7/m, z18.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vblendps ymm0, ymm1, ymm2, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x0c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vblendps ymm0, ymm1, ymm2, 10000001b": {
-      "ExpectedInstructionCount": 50,
+      "ExpectedInstructionCount": 60,
       "Comment": [
         "Map 3 0b01 0x0c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.s, s18",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.s, s3",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[1]",
+        "mov z1.s, z2.s[1]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[2]",
+        "mov z1.s, z2.s[2]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[3]",
+        "mov z1.s, z2.s[3]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[4]",
+        "mov z1.s, z2.s[4]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[5]",
+        "mov z1.s, z2.s[5]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z17.s[6]",
+        "mov z1.s, z2.s[6]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
-        "mov z2.s, p0/m, z1.s",
+        "mov z5.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, z18.s[7]",
-        "mov z16.d, z2.d",
+        "mov z1.s, z3.s[7]",
+        "mov z2.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
-        "mov z16.s, p0/m, z1.s",
-        "msr nzcv, x0"
+        "mov z2.s, p0/m, z1.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vblendps ymm0, ymm1, ymm2, 11111111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x0c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z18.d"
+        "mov z2.d, p7/m, z18.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vblendpd xmm0, xmm1, xmm2, 00b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x0d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vblendpd xmm0, xmm1, xmm2, 01b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 3 0b01 0x0d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.d[0], v18.d[0]",
-        "mov v16.16b, v2.16b",
-        "mov v16.d[1], v17.d[1]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.d[0], v3.d[0]",
+        "mov v3.16b, v5.16b",
+        "mov v3.d[1], v2.d[1]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vblendpd xmm0, xmm1, xmm2, 10b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "Map 3 0b01 0x0d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.d[0], v17.d[0]",
-        "mov v16.16b, v2.16b",
-        "mov v16.d[1], v18.d[1]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.d[0], v2.d[0]",
+        "mov v2.16b, v5.16b",
+        "mov v2.d[1], v3.d[1]",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vblendpd xmm0, xmm1, xmm2, 11b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x0d 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v18.16b"
+        "mov z2.d, p7/m, z18.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vblendpd ymm0, ymm1, ymm2, 0000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x0d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vblendpd ymm0, ymm1, ymm2, 0001b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 32,
       "Comment": [
         "Map 3 0b01 0x0d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d18",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.d, d3",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[1]",
+        "mov z1.d, z2.d[1]",
+        "mov z3.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
+        "mov z1.d, z2.d[2]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[3]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vblendpd ymm0, ymm1, ymm2, 0010b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 32,
       "Comment": [
         "Map 3 0b01 0x0d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d17",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.d, d2",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[1]",
+        "mov z1.d, z3.d[1]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
+        "mov z1.d, z2.d[2]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[3]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z4.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vblendpd ymm0, ymm1, ymm2, 0011b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 32,
       "Comment": [
         "Map 3 0b01 0x0d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d18",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.d, d3",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[1]",
+        "mov z1.d, z3.d[1]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
+        "mov z1.d, z2.d[2]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z3.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[3]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z4.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vblendpd ymm0, ymm1, ymm2, 0100b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 32,
       "Comment": [
         "Map 3 0b01 0x0d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d17",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.d, d2",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[1]",
+        "mov z1.d, z2.d[1]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[2]",
+        "mov z1.d, z3.d[2]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[3]",
+        "mov z3.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vblendpd ymm0, ymm1, ymm2, 0101b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 32,
       "Comment": [
         "Map 3 0b01 0x0d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d18",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.d, d3",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[1]",
+        "mov z1.d, z2.d[1]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[2]",
+        "mov z1.d, z3.d[2]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[3]",
+        "mov z3.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vblendpd ymm0, ymm1, ymm2, 0110b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 32,
       "Comment": [
         "Map 3 0b01 0x0d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d17",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.d, d2",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[1]",
+        "mov z1.d, z3.d[1]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[2]",
+        "mov z1.d, z3.d[2]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[3]",
+        "mov z3.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vblendpd ymm0, ymm1, ymm2, 0111b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 32,
       "Comment": [
         "Map 3 0b01 0x0d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d18",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.d, d3",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[1]",
+        "mov z1.d, z3.d[1]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[2]",
+        "mov z1.d, z3.d[2]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z2.d[3]",
+        "mov z3.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z3.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vblendpd ymm0, ymm1, ymm2, 1000b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 32,
       "Comment": [
         "Map 3 0b01 0x0d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d17",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.d, d2",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[1]",
+        "mov z1.d, z2.d[1]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
+        "mov z1.d, z2.d[2]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z3.d[3]",
+        "mov z2.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z2.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vblendpd ymm0, ymm1, ymm2, 1001b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 32,
       "Comment": [
         "Map 3 0b01 0x0d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d18",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.d, d3",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[1]",
+        "mov z1.d, z2.d[1]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
+        "mov z1.d, z2.d[2]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z3.d[3]",
+        "mov z2.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z2.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vblendpd ymm0, ymm1, ymm2, 1010b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 32,
       "Comment": [
         "Map 3 0b01 0x0d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d17",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.d, d2",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[1]",
+        "mov z1.d, z3.d[1]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
+        "mov z1.d, z2.d[2]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z3.d[3]",
+        "mov z2.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z2.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vblendpd ymm0, ymm1, ymm2, 1011b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 32,
       "Comment": [
         "Map 3 0b01 0x0d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d18",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.d, d3",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[1]",
+        "mov z1.d, z3.d[1]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[2]",
+        "mov z1.d, z2.d[2]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z3.d[3]",
+        "mov z2.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z2.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vblendpd ymm0, ymm1, ymm2, 1100b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 32,
       "Comment": [
         "Map 3 0b01 0x0d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d17",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.d, d2",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[1]",
+        "mov z1.d, z2.d[1]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[2]",
+        "mov z1.d, z3.d[2]",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
         "mov z2.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z3.d[3]",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z4.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vblendpd ymm0, ymm1, ymm2, 1101b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 32,
       "Comment": [
         "Map 3 0b01 0x0d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d18",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.d, d3",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z17.d[1]",
+        "mov z1.d, z2.d[1]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[2]",
+        "mov z1.d, z3.d[2]",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
         "mov z2.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z3.d[3]",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z4.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vblendpd ymm0, ymm1, ymm2, 1110b": {
-      "ExpectedInstructionCount": 26,
+      "ExpectedInstructionCount": 32,
       "Comment": [
         "Map 3 0b01 0x0d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.d, d17",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.d, d2",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-2",
-        "mov z2.d, p0/m, z1.d",
+        "mov z5.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[1]",
+        "mov z1.d, z3.d[1]",
+        "mov z2.d, z5.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #-1",
         "mov z2.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[2]",
+        "mov z1.d, z3.d[2]",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #0",
-        "mov z2.d, p0/m, z1.d",
+        "mov z4.d, p0/m, z1.d",
         "msr nzcv, x0",
-        "mov z1.d, z18.d[3]",
-        "mov z16.d, z2.d",
+        "mov z1.d, z3.d[3]",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.d, #-2, #1",
         "cmpeq p0.d, p7/z, z0.d, #1",
-        "mov z16.d, p0/m, z1.d",
-        "msr nzcv, x0"
+        "mov z2.d, p0/m, z1.d",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vblendpd ymm0, ymm1, ymm2, 1111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x0d 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z18.d"
+        "mov z2.d, p7/m, z18.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpblendw xmm0, xmm1, xmm2, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x0e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpblendw xmm0, xmm1, xmm2, 00000001b": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 21,
       "Comment": [
         "Map 3 0b01 0x0e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov v2.h[0], v18.h[0]",
-        "mov v2.h[1], v17.h[1]",
-        "mov v2.h[2], v17.h[2]",
-        "mov v2.h[3], v17.h[3]",
-        "mov v2.h[4], v17.h[4]",
-        "mov v2.h[5], v17.h[5]",
-        "mov v2.h[6], v17.h[6]",
-        "mov v2.h[7], v17.h[7]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v4.16b",
+        "mov v5.h[0], v3.h[0]",
+        "mov v3.16b, v5.16b",
+        "mov v3.h[1], v2.h[1]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[2], v2.h[2]",
+        "mov v3.16b, v4.16b",
+        "mov v3.h[3], v2.h[3]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[4], v2.h[4]",
+        "mov v3.16b, v4.16b",
+        "mov v3.h[5], v2.h[5]",
+        "mov v4.16b, v3.16b",
+        "mov v4.h[6], v2.h[6]",
+        "mov v3.16b, v4.16b",
+        "mov v3.h[7], v2.h[7]",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpblendw xmm0, xmm1, xmm2, 11111111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x0e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v18.16b"
+        "mov z2.d, p7/m, z18.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpblendw ymm0, ymm1, ymm2, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x0e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpblendw ymm0, ymm1, ymm2, 00000001b": {
-      "ExpectedInstructionCount": 98,
+      "ExpectedInstructionCount": 116,
       "Comment": [
         "Map 3 0b01 0x0e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.h, h18",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.h, h3",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-8",
-        "mov z2.h, p0/m, z1.h",
+        "mov z5.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[1]",
+        "mov z1.h, z2.h[1]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-7",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[2]",
+        "mov z1.h, z2.h[2]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-6",
-        "mov z2.h, p0/m, z1.h",
+        "mov z5.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[3]",
+        "mov z1.h, z2.h[3]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-5",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[4]",
+        "mov z1.h, z2.h[4]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-4",
-        "mov z2.h, p0/m, z1.h",
+        "mov z5.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[5]",
+        "mov z1.h, z2.h[5]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-3",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[6]",
+        "mov z1.h, z2.h[6]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-2",
-        "mov z2.h, p0/m, z1.h",
+        "mov z5.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[7]",
+        "mov z1.h, z2.h[7]",
+        "mov z4.d, z5.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #-1",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z18.h[8]",
+        "mov z1.h, z3.h[8]",
+        "mov z5.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #0",
-        "mov z2.h, p0/m, z1.h",
+        "mov z5.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[9]",
+        "mov z1.h, z2.h[9]",
+        "mov z3.d, z5.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #1",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[10]",
+        "mov z1.h, z2.h[10]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #2",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[11]",
+        "mov z1.h, z2.h[11]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #3",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[12]",
+        "mov z1.h, z2.h[12]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #4",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[13]",
+        "mov z1.h, z2.h[13]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #5",
-        "mov z2.h, p0/m, z1.h",
+        "mov z3.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[14]",
+        "mov z1.h, z2.h[14]",
+        "mov z4.d, z3.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #6",
-        "mov z2.h, p0/m, z1.h",
+        "mov z4.h, p0/m, z1.h",
         "msr nzcv, x0",
-        "mov z1.h, z17.h[15]",
-        "mov z16.d, z2.d",
+        "mov z1.h, z2.h[15]",
+        "mov z3.d, z4.d",
         "mrs x0, nzcv",
         "index z0.h, #-8, #1",
         "cmpeq p0.h, p7/z, z0.h, #7",
-        "mov z16.h, p0/m, z1.h",
-        "msr nzcv, x0"
+        "mov z3.h, p0/m, z1.h",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpblendw ymm0, ymm1, ymm2, 11111111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x0e 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z18.d"
+        "mov z2.d, p7/m, z18.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpalignr xmm0, xmm1, xmm2, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x0f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v18.16b"
+        "mov z2.d, p7/m, z18.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpalignr xmm0, xmm1, xmm2, 1": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 3 0b01 0x0f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ext v16.16b, v18.16b, v17.16b, #1"
+        "mov z2.d, p7/m, z18.d",
+        "mov z3.d, p7/m, z17.d",
+        "ext v4.16b, v2.16b, v3.16b, #1",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpalignr xmm0, xmm1, xmm2, 15": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 3 0b01 0x0f 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ext v16.16b, v18.16b, v17.16b, #15"
+        "mov z2.d, p7/m, z18.d",
+        "mov z3.d, p7/m, z17.d",
+        "ext v4.16b, v2.16b, v3.16b, #15",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpalignr xmm0, xmm1, xmm2, 16": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 3 0b01 0x0f 128-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z18.d",
+        "mov z3.d, p7/m, z17.d",
         "movi v0.2d, #0x0",
-        "ext v16.16b, v17.16b, v0.16b, #0"
+        "ext v4.16b, v3.16b, v0.16b, #0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpalignr ymm0, ymm1, ymm2, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x0f 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z18.d"
+        "mov z2.d, p7/m, z18.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpalignr ymm0, ymm1, ymm2, 1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "Map 3 0b01 0x0f 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ext v2.16b, v18.16b, v17.16b, #1",
-        "mov z1.q, z17.q[1]",
-        "mov z3.d, z17.d",
+        "mov z2.d, p7/m, z18.d",
+        "mov z3.d, p7/m, z17.d",
+        "ext v4.16b, v2.16b, v3.16b, #1",
+        "mov z1.q, z3.q[1]",
+        "mov z5.d, z3.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, z2.q[1]",
+        "mov z3.d, z2.d",
         "mov z3.b, p6/m, z1.b",
-        "mov z1.q, z18.q[1]",
-        "mov z4.d, z18.d",
-        "mov z4.b, p6/m, z1.b",
-        "ext v3.16b, v4.16b, v3.16b, #1",
-        "mov z1.q, q3",
-        "mov z16.d, z2.d",
+        "ext v2.16b, v3.16b, v5.16b, #1",
+        "mov z1.q, q2",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpalignr ymm0, ymm1, ymm2, 15": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "Map 3 0b01 0x0f 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "ext v2.16b, v18.16b, v17.16b, #15",
-        "mov z1.q, z17.q[1]",
-        "mov z3.d, z17.d",
+        "mov z2.d, p7/m, z18.d",
+        "mov z3.d, p7/m, z17.d",
+        "ext v4.16b, v2.16b, v3.16b, #15",
+        "mov z1.q, z3.q[1]",
+        "mov z5.d, z3.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, z2.q[1]",
+        "mov z3.d, z2.d",
         "mov z3.b, p6/m, z1.b",
-        "mov z1.q, z18.q[1]",
-        "mov z4.d, z18.d",
-        "mov z4.b, p6/m, z1.b",
-        "ext v3.16b, v4.16b, v3.16b, #15",
-        "mov z1.q, q3",
-        "mov z16.d, z2.d",
+        "ext v2.16b, v3.16b, v5.16b, #15",
+        "mov z1.q, q2",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpalignr ymm0, ymm1, ymm2, 16": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "Map 3 0b01 0x0f 256-bit"
       ],
       "ExpectedArm64ASM": [
+        "mov z2.d, p7/m, z18.d",
+        "mov z3.d, p7/m, z17.d",
         "movi v0.2d, #0x0",
-        "ext v2.16b, v17.16b, v0.16b, #0",
-        "mov z1.q, z17.q[1]",
-        "mov z3.d, z17.d",
+        "ext v4.16b, v3.16b, v0.16b, #0",
+        "mov z1.q, z3.q[1]",
+        "mov z5.d, z3.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, z2.q[1]",
+        "mov z3.d, z2.d",
         "mov z3.b, p6/m, z1.b",
-        "mov z1.q, z18.q[1]",
-        "mov z4.d, z18.d",
-        "mov z4.b, p6/m, z1.b",
         "movi v0.2d, #0x0",
-        "ext v3.16b, v3.16b, v0.16b, #0",
-        "mov z1.q, q3",
-        "mov z16.d, z2.d",
+        "ext v2.16b, v5.16b, v0.16b, #0",
+        "mov z1.q, q2",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpextrb rax, xmm0, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x14 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "umov w4, v16.b[0]"
+        "mov z2.d, p7/m, z16.d",
+        "umov w20, v2.b[0]",
+        "mov x4, x20"
       ]
     },
     "vpextrb rax, xmm0, 15": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x14 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "umov w4, v16.b[15]"
+        "mov z2.d, p7/m, z16.d",
+        "umov w20, v2.b[15]",
+        "mov x4, x20"
       ]
     },
     "vpextrw rax, xmm0, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x15 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "umov w4, v16.h[0]"
+        "mov z2.d, p7/m, z16.d",
+        "umov w20, v2.h[0]",
+        "mov x4, x20"
       ]
     },
     "vpextrw rax, xmm0, 7": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x15 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "umov w4, v16.h[7]"
+        "mov z2.d, p7/m, z16.d",
+        "umov w20, v2.h[7]",
+        "mov x4, x20"
       ]
     },
     "vpextrd rax, xmm0, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x16 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov w4, v16.s[0]"
+        "mov z2.d, p7/m, z16.d",
+        "mov w20, v2.s[0]",
+        "mov x4, x20"
       ]
     },
     "vpextrd rax, xmm0, 3": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x16 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov w4, v16.s[3]"
+        "mov z2.d, p7/m, z16.d",
+        "mov w20, v2.s[3]",
+        "mov x4, x20"
       ]
     },
     "vpextrb [rax], xmm0, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x14 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1 {v16.b}[0], [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1 {v2.b}[0], [x20]"
       ]
     },
     "vpextrb [rax], xmm0, 15": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x14 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1 {v16.b}[15], [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1 {v2.b}[15], [x20]"
       ]
     },
     "vpextrw [rax], xmm0, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x15 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1 {v16.h}[0], [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1 {v2.h}[0], [x20]"
       ]
     },
     "vpextrw [rax], xmm0, 7": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x15 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1 {v16.h}[7], [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1 {v2.h}[7], [x20]"
       ]
     },
     "vpextrd [rax], xmm0, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x16 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1 {v16.s}[0], [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1 {v2.s}[0], [x20]"
       ]
     },
     "vpextrd [rax], xmm0, 3": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x16 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "st1 {v16.s}[3], [x4]"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "st1 {v2.s}[3], [x20]"
       ]
     },
     "vextractps eax, xmm0, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x17 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov w4, v16.s[0]"
+        "mov z2.d, p7/m, z16.d",
+        "mov w20, v2.s[0]",
+        "mov x4, x20"
       ]
     },
     "vextractps eax, xmm0, 3": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x17 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov w4, v16.s[3]"
+        "mov z2.d, p7/m, z16.d",
+        "mov w20, v2.s[3]",
+        "mov x4, x20"
       ]
     },
     "vinsertf128 ymm0, ymm1, xmm2, 0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 3 0b01 0x18 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.q, q18",
-        "mov z16.d, z17.d",
-        "mov z16.b, p6/m, z1.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z1.q, q3",
+        "mov z4.d, z2.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vinsertf128 ymm0, ymm1, xmm2, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0x18 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.q, q18",
-        "mov z16.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z1.q, q3",
+        "mov z4.d, z2.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z4.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vextractf128 xmm0, ymm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x19 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vextractf128 xmm0, ymm1, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 3 0b01 0x19 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z2.q, z17.q[1]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.q, z2.q[1]",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vcvtps2ph xmm0, xmm1, 00000000b": {
@@ -3141,997 +3776,1143 @@
       ]
     },
     "vpinsrb xmm0, xmm0, eax, 0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 3 0b01 0x20 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v16.16b",
-        "mov v2.b[0], w4",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.b[0], w20",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpinsrb xmm0, xmm1, eax, 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 3 0b01 0x20 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "mov v16.b[0], w4"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.b[0], w20",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpinsrb xmm0, xmm1, eax, 15": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 3 0b01 0x20 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "mov v16.b[15], w4"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.b[15], w20",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vinsertps xmm0, xmm1, xmm2, ((0b00 << 6) | (0b00 << 4) | (0b0000))": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 3 0b01 0x21 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "mov v16.s[0], v18.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[0], v3.s[0]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vinsertps xmm0, xmm1, xmm2, ((0b00 << 6) | (0b00 << 4) | (0b1111))": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x21 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vinsertps xmm0, xmm1, xmm2, ((0b11 << 6) | (0b11 << 4) | (0b0000))": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 3 0b01 0x21 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "mov v16.s[3], v18.s[3]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov v4.16b, v2.16b",
+        "mov v4.s[3], v3.s[3]",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpinsrd xmm0, xmm0, eax, 0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 3 0b01 0x22 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v16.16b",
-        "mov v2.s[0], w4",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[0], w20",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpinsrd xmm0, xmm1, eax, 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 3 0b01 0x22 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "mov v16.s[0], w4"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[0], w20",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpinsrd xmm0, xmm1, eax, 3": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 3 0b01 0x22 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "mov v16.s[3], w4"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.s[3], w20",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpinsrq xmm0, xmm0, rax, 0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 3 0b01 0x22 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v2.16b, v16.16b",
-        "mov v2.d[0], x4",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z16.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.d[0], x20",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpinsrq xmm0, xmm1, rax, 0": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 3 0b01 0x22 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "mov v16.d[0], x4"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.d[0], x20",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpinsrq xmm0, xmm1, rax, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 3 0b01 0x22 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b",
-        "mov v16.d[1], x4"
+        "mov z2.d, p7/m, z17.d",
+        "mov x20, x4",
+        "mov v3.16b, v2.16b",
+        "mov v3.d[1], x20",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vinserti128 ymm0, ymm1, xmm2, 0": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 3 0b01 0x38 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.q, q18",
-        "mov z16.d, z17.d",
-        "mov z16.b, p6/m, z1.b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z1.q, q3",
+        "mov z4.d, z2.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vinserti128 ymm0, ymm1, xmm2, 1": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0x38 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z1.q, q18",
-        "mov z16.d, z17.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z1.q, q3",
+        "mov z4.d, z2.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z4.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vextracti128 xmm0, ymm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b01 0x39 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vextracti128 xmm0, ymm1, 1": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 3 0b01 0x39 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z2.q, z17.q[1]",
-        "mov v16.16b, v2.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.q, z2.q[1]",
+        "mov v2.16b, v3.16b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vdpps xmm0, xmm1, xmm2, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x40 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vdpps xmm0, xmm1, xmm2, 00001111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x40 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vdpps xmm0, xmm1, xmm2, 11110000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x40 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vdpps xmm0, xmm1, xmm2, 11111111b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 3 0b01 0x40 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fmul v2.4s, v17.4s, v18.4s",
-        "faddv s2, p6, z2.s",
-        "dup v16.4s, v2.s[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fmul v4.4s, v2.4s, v3.4s",
+        "faddv s2, p6, z4.s",
+        "dup v3.4s, v2.s[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vdpps ymm0, ymm1, ymm2, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x40 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vdpps ymm0, ymm1, ymm2, 00001111b": {
-      "ExpectedInstructionCount": 109,
+      "ExpectedInstructionCount": 127,
       "Comment": [
         "Map 3 0b01 0x40 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "fmul z3.s, z17.s, z18.s",
-        "mov z1.s, s2",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "fmul z5.s, z2.s, z3.s",
+        "mov z1.s, s4",
+        "mov z2.d, z5.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
-        "mov z3.s, p0/m, z1.s",
+        "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s2",
+        "mov z1.s, s4",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
         "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s2",
+        "mov z1.s, s4",
+        "mov z2.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
-        "mov z3.s, p0/m, z1.s",
+        "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s2",
+        "mov z1.s, s4",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
         "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s2",
+        "mov z1.s, s4",
+        "mov z2.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
-        "mov z3.s, p0/m, z1.s",
+        "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s2",
+        "mov z1.s, s4",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
         "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s2",
+        "mov z1.s, s4",
+        "mov z2.d, z3.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
-        "mov z3.s, p0/m, z1.s",
+        "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
-        "mov z1.s, s2",
+        "mov z1.s, s4",
+        "mov z3.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
         "mov z3.s, p0/m, z1.s",
         "msr nzcv, x0",
         "movprfx z0, z3",
-        "faddp z0.s, p7/m, z0.s, z2.s",
-        "uzp1 z3.s, z0.s, z0.s",
+        "faddp z0.s, p7/m, z0.s, z4.s",
+        "uzp1 z2.s, z0.s, z0.s",
         "uzp2 z1.s, z0.s, z0.s",
-        "splice z3.d, p6, z3.d, z1.d",
-        "movprfx z0, z3",
-        "faddp z0.s, p7/m, z0.s, z2.s",
+        "splice z2.d, p6, z2.d, z1.d",
+        "movprfx z0, z2",
+        "faddp z0.s, p7/m, z0.s, z4.s",
         "uzp1 z3.s, z0.s, z0.s",
         "uzp2 z1.s, z0.s, z0.s",
         "splice z3.d, p6, z3.d, z1.d",
         "mov z1.s, s3",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
         "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
         "mov z1.s, s3",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
         "mov z1.s, s3",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
         "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
         "mov z1.s, s3",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
         "mov z1.s, s3",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
         "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
         "mov z1.s, s3",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
         "mov z1.s, s3",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
         "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
         "mov z1.s, s3",
-        "mov z16.d, z2.d",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
-        "mov z16.s, p0/m, z1.s",
-        "msr nzcv, x0"
+        "mov z4.s, p0/m, z1.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vdpps ymm0, ymm1, ymm2, 11110000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x40 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vdpps ymm0, ymm1, ymm2, 11111111b": {
-      "ExpectedInstructionCount": 61,
+      "ExpectedInstructionCount": 71,
       "Comment": [
         "Map 3 0b01 0x40 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "fmul z3.s, z17.s, z18.s",
-        "movprfx z0, z3",
-        "faddp z0.s, p7/m, z0.s, z2.s",
-        "uzp1 z3.s, z0.s, z0.s",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "fmul z5.s, z2.s, z3.s",
+        "movprfx z0, z5",
+        "faddp z0.s, p7/m, z0.s, z4.s",
+        "uzp1 z2.s, z0.s, z0.s",
         "uzp2 z1.s, z0.s, z0.s",
-        "splice z3.d, p6, z3.d, z1.d",
-        "movprfx z0, z3",
-        "faddp z0.s, p7/m, z0.s, z2.s",
+        "splice z2.d, p6, z2.d, z1.d",
+        "movprfx z0, z2",
+        "faddp z0.s, p7/m, z0.s, z4.s",
         "uzp1 z3.s, z0.s, z0.s",
         "uzp2 z1.s, z0.s, z0.s",
         "splice z3.d, p6, z3.d, z1.d",
         "mov z1.s, s3",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-4",
         "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
         "mov z1.s, s3",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-3",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
         "mov z1.s, s3",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-2",
         "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
         "mov z1.s, s3",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #-1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
         "mov z1.s, s3",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #0",
         "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
         "mov z1.s, s3",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #1",
-        "mov z2.s, p0/m, z1.s",
+        "mov z4.s, p0/m, z1.s",
         "msr nzcv, x0",
         "mov z1.s, s3",
+        "mov z2.d, z4.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #2",
         "mov z2.s, p0/m, z1.s",
         "msr nzcv, x0",
         "mov z1.s, s3",
-        "mov z16.d, z2.d",
+        "mov z4.d, z2.d",
         "mrs x0, nzcv",
         "index z0.s, #-4, #1",
         "cmpeq p0.s, p7/z, z0.s, #3",
-        "mov z16.s, p0/m, z1.s",
-        "msr nzcv, x0"
+        "mov z4.s, p0/m, z1.s",
+        "msr nzcv, x0",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vdppd xmm0, xmm1, xmm2, 00000000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x41 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vdppd xmm0, xmm1, xmm2, 00001111b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x41 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vdppd xmm0, xmm1, xmm2, 11110000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x41 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vdppd xmm0, xmm1, xmm2, 11111111b": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 3 0b01 0x41 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "fmul v2.2d, v17.2d, v18.2d",
-        "faddv d2, p6, z2.d",
-        "dup v16.2d, v2.d[0]"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "fmul v4.2d, v2.2d, v3.2d",
+        "faddv d2, p6, z4.d",
+        "dup v3.2d, v2.d[0]",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vmpsadbw xmm0, xmm1, xmm2, 000b": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "Map 3 0b01 0x42 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v18.s[0]",
-        "ext v3.16b, v17.16b, v17.16b, #0",
-        "ext v4.16b, v17.16b, v17.16b, #1",
-        "ext v5.16b, v17.16b, v17.16b, #2",
-        "ext v6.16b, v17.16b, v17.16b, #3",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v16.8h, v4.8h, v2.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v4.4s, v3.s[0]",
+        "ext v3.16b, v2.16b, v2.16b, #0",
+        "ext v5.16b, v2.16b, v2.16b, #1",
+        "ext v6.16b, v2.16b, v2.16b, #2",
+        "ext v7.16b, v2.16b, v2.16b, #3",
+        "uabdl v2.8h, v3.8b, v4.8b",
+        "uabdl v3.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "addp v4.8h, v2.8h, v5.8h",
+        "addp v2.8h, v3.8h, v6.8h",
+        "trn1 v3.4s, v4.4s, v2.4s",
+        "trn2 v5.4s, v4.4s, v2.4s",
+        "addp v2.8h, v3.8h, v5.8h",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmpsadbw xmm0, xmm1, xmm2, 001b": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "Map 3 0b01 0x42 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v18.s[1]",
-        "ext v3.16b, v17.16b, v17.16b, #0",
-        "ext v4.16b, v17.16b, v17.16b, #1",
-        "ext v5.16b, v17.16b, v17.16b, #2",
-        "ext v6.16b, v17.16b, v17.16b, #3",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v16.8h, v4.8h, v2.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v4.4s, v3.s[1]",
+        "ext v3.16b, v2.16b, v2.16b, #0",
+        "ext v5.16b, v2.16b, v2.16b, #1",
+        "ext v6.16b, v2.16b, v2.16b, #2",
+        "ext v7.16b, v2.16b, v2.16b, #3",
+        "uabdl v2.8h, v3.8b, v4.8b",
+        "uabdl v3.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "addp v4.8h, v2.8h, v5.8h",
+        "addp v2.8h, v3.8h, v6.8h",
+        "trn1 v3.4s, v4.4s, v2.4s",
+        "trn2 v5.4s, v4.4s, v2.4s",
+        "addp v2.8h, v3.8h, v5.8h",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmpsadbw xmm0, xmm1, xmm2, 010b": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "Map 3 0b01 0x42 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v18.s[2]",
-        "ext v3.16b, v17.16b, v17.16b, #0",
-        "ext v4.16b, v17.16b, v17.16b, #1",
-        "ext v5.16b, v17.16b, v17.16b, #2",
-        "ext v6.16b, v17.16b, v17.16b, #3",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v16.8h, v4.8h, v2.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v4.4s, v3.s[2]",
+        "ext v3.16b, v2.16b, v2.16b, #0",
+        "ext v5.16b, v2.16b, v2.16b, #1",
+        "ext v6.16b, v2.16b, v2.16b, #2",
+        "ext v7.16b, v2.16b, v2.16b, #3",
+        "uabdl v2.8h, v3.8b, v4.8b",
+        "uabdl v3.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "addp v4.8h, v2.8h, v5.8h",
+        "addp v2.8h, v3.8h, v6.8h",
+        "trn1 v3.4s, v4.4s, v2.4s",
+        "trn2 v5.4s, v4.4s, v2.4s",
+        "addp v2.8h, v3.8h, v5.8h",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmpsadbw xmm0, xmm1, xmm2, 011b": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "Map 3 0b01 0x42 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v18.s[3]",
-        "ext v3.16b, v17.16b, v17.16b, #0",
-        "ext v4.16b, v17.16b, v17.16b, #1",
-        "ext v5.16b, v17.16b, v17.16b, #2",
-        "ext v6.16b, v17.16b, v17.16b, #3",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v16.8h, v4.8h, v2.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v4.4s, v3.s[3]",
+        "ext v3.16b, v2.16b, v2.16b, #0",
+        "ext v5.16b, v2.16b, v2.16b, #1",
+        "ext v6.16b, v2.16b, v2.16b, #2",
+        "ext v7.16b, v2.16b, v2.16b, #3",
+        "uabdl v2.8h, v3.8b, v4.8b",
+        "uabdl v3.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "addp v4.8h, v2.8h, v5.8h",
+        "addp v2.8h, v3.8h, v6.8h",
+        "trn1 v3.4s, v4.4s, v2.4s",
+        "trn2 v5.4s, v4.4s, v2.4s",
+        "addp v2.8h, v3.8h, v5.8h",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmpsadbw xmm0, xmm1, xmm2, 100b": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "Map 3 0b01 0x42 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v18.s[0]",
-        "ext v3.16b, v17.16b, v17.16b, #4",
-        "ext v4.16b, v17.16b, v17.16b, #5",
-        "ext v5.16b, v17.16b, v17.16b, #6",
-        "ext v6.16b, v17.16b, v17.16b, #7",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v16.8h, v4.8h, v2.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v4.4s, v3.s[0]",
+        "ext v3.16b, v2.16b, v2.16b, #4",
+        "ext v5.16b, v2.16b, v2.16b, #5",
+        "ext v6.16b, v2.16b, v2.16b, #6",
+        "ext v7.16b, v2.16b, v2.16b, #7",
+        "uabdl v2.8h, v3.8b, v4.8b",
+        "uabdl v3.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "addp v4.8h, v2.8h, v5.8h",
+        "addp v2.8h, v3.8h, v6.8h",
+        "trn1 v3.4s, v4.4s, v2.4s",
+        "trn2 v5.4s, v4.4s, v2.4s",
+        "addp v2.8h, v3.8h, v5.8h",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmpsadbw xmm0, xmm1, xmm2, 101b": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "Map 3 0b01 0x42 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v18.s[1]",
-        "ext v3.16b, v17.16b, v17.16b, #4",
-        "ext v4.16b, v17.16b, v17.16b, #5",
-        "ext v5.16b, v17.16b, v17.16b, #6",
-        "ext v6.16b, v17.16b, v17.16b, #7",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v16.8h, v4.8h, v2.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v4.4s, v3.s[1]",
+        "ext v3.16b, v2.16b, v2.16b, #4",
+        "ext v5.16b, v2.16b, v2.16b, #5",
+        "ext v6.16b, v2.16b, v2.16b, #6",
+        "ext v7.16b, v2.16b, v2.16b, #7",
+        "uabdl v2.8h, v3.8b, v4.8b",
+        "uabdl v3.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "addp v4.8h, v2.8h, v5.8h",
+        "addp v2.8h, v3.8h, v6.8h",
+        "trn1 v3.4s, v4.4s, v2.4s",
+        "trn2 v5.4s, v4.4s, v2.4s",
+        "addp v2.8h, v3.8h, v5.8h",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmpsadbw xmm0, xmm1, xmm2, 110b": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "Map 3 0b01 0x42 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v18.s[2]",
-        "ext v3.16b, v17.16b, v17.16b, #4",
-        "ext v4.16b, v17.16b, v17.16b, #5",
-        "ext v5.16b, v17.16b, v17.16b, #6",
-        "ext v6.16b, v17.16b, v17.16b, #7",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v16.8h, v4.8h, v2.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v4.4s, v3.s[2]",
+        "ext v3.16b, v2.16b, v2.16b, #4",
+        "ext v5.16b, v2.16b, v2.16b, #5",
+        "ext v6.16b, v2.16b, v2.16b, #6",
+        "ext v7.16b, v2.16b, v2.16b, #7",
+        "uabdl v2.8h, v3.8b, v4.8b",
+        "uabdl v3.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "addp v4.8h, v2.8h, v5.8h",
+        "addp v2.8h, v3.8h, v6.8h",
+        "trn1 v3.4s, v4.4s, v2.4s",
+        "trn2 v5.4s, v4.4s, v2.4s",
+        "addp v2.8h, v3.8h, v5.8h",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmpsadbw xmm0, xmm1, xmm2, 111b": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "Map 3 0b01 0x42 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v18.s[3]",
-        "ext v3.16b, v17.16b, v17.16b, #4",
-        "ext v4.16b, v17.16b, v17.16b, #5",
-        "ext v5.16b, v17.16b, v17.16b, #6",
-        "ext v6.16b, v17.16b, v17.16b, #7",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v16.8h, v4.8h, v2.8h"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v4.4s, v3.s[3]",
+        "ext v3.16b, v2.16b, v2.16b, #4",
+        "ext v5.16b, v2.16b, v2.16b, #5",
+        "ext v6.16b, v2.16b, v2.16b, #6",
+        "ext v7.16b, v2.16b, v2.16b, #7",
+        "uabdl v2.8h, v3.8b, v4.8b",
+        "uabdl v3.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "addp v4.8h, v2.8h, v5.8h",
+        "addp v2.8h, v3.8h, v6.8h",
+        "trn1 v3.4s, v4.4s, v2.4s",
+        "trn2 v5.4s, v4.4s, v2.4s",
+        "addp v2.8h, v3.8h, v5.8h",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmpsadbw ymm0, ymm1, ymm2, 000b": {
-      "ExpectedInstructionCount": 34,
+      "ExpectedInstructionCount": 37,
       "Comment": [
         "Map 3 0b01 0x42 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v18.s[0]",
-        "ext v3.16b, v17.16b, v17.16b, #0",
-        "ext v4.16b, v17.16b, v17.16b, #1",
-        "ext v5.16b, v17.16b, v17.16b, #2",
-        "ext v6.16b, v17.16b, v17.16b, #3",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v2.8h, v4.8h, v2.8h",
-        "mov z3.q, z17.q[1]",
-        "mov z4.q, z18.q[1]",
-        "dup v4.4s, v4.s[0]",
-        "ext v5.16b, v3.16b, v3.16b, #0",
-        "ext v6.16b, v3.16b, v3.16b, #1",
-        "ext v7.16b, v3.16b, v3.16b, #2",
-        "ext v3.16b, v3.16b, v3.16b, #3",
-        "uabdl v5.8h, v5.8b, v4.8b",
-        "uabdl v6.8h, v6.8b, v4.8b",
-        "uabdl v7.8h, v7.8b, v4.8b",
-        "uabdl v3.8h, v3.8b, v4.8b",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v4.4s, v3.s[0]",
+        "ext v5.16b, v2.16b, v2.16b, #0",
+        "ext v6.16b, v2.16b, v2.16b, #1",
+        "ext v7.16b, v2.16b, v2.16b, #2",
+        "ext v8.16b, v2.16b, v2.16b, #3",
+        "uabdl v9.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "uabdl v7.8h, v8.8b, v4.8b",
+        "addp v4.8h, v9.8h, v6.8h",
+        "addp v6.8h, v5.8h, v7.8h",
+        "trn1 v5.4s, v4.4s, v6.4s",
+        "trn2 v7.4s, v4.4s, v6.4s",
         "addp v4.8h, v5.8h, v7.8h",
-        "addp v3.8h, v6.8h, v3.8h",
-        "trn1 v5.4s, v4.4s, v3.4s",
-        "trn2 v3.4s, v4.4s, v3.4s",
-        "addp v3.8h, v5.8h, v3.8h",
+        "mov z5.q, z2.q[1]",
+        "mov z2.q, z3.q[1]",
+        "dup v3.4s, v2.s[0]",
+        "ext v2.16b, v5.16b, v5.16b, #0",
+        "ext v6.16b, v5.16b, v5.16b, #1",
+        "ext v7.16b, v5.16b, v5.16b, #2",
+        "ext v8.16b, v5.16b, v5.16b, #3",
+        "uabdl v5.8h, v2.8b, v3.8b",
+        "uabdl v2.8h, v6.8b, v3.8b",
+        "uabdl v6.8h, v7.8b, v3.8b",
+        "uabdl v7.8h, v8.8b, v3.8b",
+        "addp v3.8h, v5.8h, v6.8h",
+        "addp v5.8h, v2.8h, v7.8h",
+        "trn1 v2.4s, v3.4s, v5.4s",
+        "trn2 v6.4s, v3.4s, v5.4s",
+        "addp v3.8h, v2.8h, v6.8h",
         "mov z1.q, q3",
-        "mov z16.d, z2.d",
+        "mov z2.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmpsadbw ymm0, ymm1, ymm2, 001b": {
-      "ExpectedInstructionCount": 34,
+      "ExpectedInstructionCount": 37,
       "Comment": [
         "Map 3 0b01 0x42 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v18.s[1]",
-        "ext v3.16b, v17.16b, v17.16b, #0",
-        "ext v4.16b, v17.16b, v17.16b, #1",
-        "ext v5.16b, v17.16b, v17.16b, #2",
-        "ext v6.16b, v17.16b, v17.16b, #3",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v2.8h, v4.8h, v2.8h",
-        "mov z3.q, z17.q[1]",
-        "mov z4.q, z18.q[1]",
-        "dup v4.4s, v4.s[0]",
-        "ext v5.16b, v3.16b, v3.16b, #0",
-        "ext v6.16b, v3.16b, v3.16b, #1",
-        "ext v7.16b, v3.16b, v3.16b, #2",
-        "ext v3.16b, v3.16b, v3.16b, #3",
-        "uabdl v5.8h, v5.8b, v4.8b",
-        "uabdl v6.8h, v6.8b, v4.8b",
-        "uabdl v7.8h, v7.8b, v4.8b",
-        "uabdl v3.8h, v3.8b, v4.8b",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v4.4s, v3.s[1]",
+        "ext v5.16b, v2.16b, v2.16b, #0",
+        "ext v6.16b, v2.16b, v2.16b, #1",
+        "ext v7.16b, v2.16b, v2.16b, #2",
+        "ext v8.16b, v2.16b, v2.16b, #3",
+        "uabdl v9.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "uabdl v7.8h, v8.8b, v4.8b",
+        "addp v4.8h, v9.8h, v6.8h",
+        "addp v6.8h, v5.8h, v7.8h",
+        "trn1 v5.4s, v4.4s, v6.4s",
+        "trn2 v7.4s, v4.4s, v6.4s",
         "addp v4.8h, v5.8h, v7.8h",
-        "addp v3.8h, v6.8h, v3.8h",
-        "trn1 v5.4s, v4.4s, v3.4s",
-        "trn2 v3.4s, v4.4s, v3.4s",
-        "addp v3.8h, v5.8h, v3.8h",
+        "mov z5.q, z2.q[1]",
+        "mov z2.q, z3.q[1]",
+        "dup v3.4s, v2.s[0]",
+        "ext v2.16b, v5.16b, v5.16b, #0",
+        "ext v6.16b, v5.16b, v5.16b, #1",
+        "ext v7.16b, v5.16b, v5.16b, #2",
+        "ext v8.16b, v5.16b, v5.16b, #3",
+        "uabdl v5.8h, v2.8b, v3.8b",
+        "uabdl v2.8h, v6.8b, v3.8b",
+        "uabdl v6.8h, v7.8b, v3.8b",
+        "uabdl v7.8h, v8.8b, v3.8b",
+        "addp v3.8h, v5.8h, v6.8h",
+        "addp v5.8h, v2.8h, v7.8h",
+        "trn1 v2.4s, v3.4s, v5.4s",
+        "trn2 v6.4s, v3.4s, v5.4s",
+        "addp v3.8h, v2.8h, v6.8h",
         "mov z1.q, q3",
-        "mov z16.d, z2.d",
+        "mov z2.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmpsadbw ymm0, ymm1, ymm2, 010b": {
-      "ExpectedInstructionCount": 34,
+      "ExpectedInstructionCount": 37,
       "Comment": [
         "Map 3 0b01 0x42 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v18.s[2]",
-        "ext v3.16b, v17.16b, v17.16b, #0",
-        "ext v4.16b, v17.16b, v17.16b, #1",
-        "ext v5.16b, v17.16b, v17.16b, #2",
-        "ext v6.16b, v17.16b, v17.16b, #3",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v2.8h, v4.8h, v2.8h",
-        "mov z3.q, z17.q[1]",
-        "mov z4.q, z18.q[1]",
-        "dup v4.4s, v4.s[0]",
-        "ext v5.16b, v3.16b, v3.16b, #0",
-        "ext v6.16b, v3.16b, v3.16b, #1",
-        "ext v7.16b, v3.16b, v3.16b, #2",
-        "ext v3.16b, v3.16b, v3.16b, #3",
-        "uabdl v5.8h, v5.8b, v4.8b",
-        "uabdl v6.8h, v6.8b, v4.8b",
-        "uabdl v7.8h, v7.8b, v4.8b",
-        "uabdl v3.8h, v3.8b, v4.8b",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v4.4s, v3.s[2]",
+        "ext v5.16b, v2.16b, v2.16b, #0",
+        "ext v6.16b, v2.16b, v2.16b, #1",
+        "ext v7.16b, v2.16b, v2.16b, #2",
+        "ext v8.16b, v2.16b, v2.16b, #3",
+        "uabdl v9.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "uabdl v7.8h, v8.8b, v4.8b",
+        "addp v4.8h, v9.8h, v6.8h",
+        "addp v6.8h, v5.8h, v7.8h",
+        "trn1 v5.4s, v4.4s, v6.4s",
+        "trn2 v7.4s, v4.4s, v6.4s",
         "addp v4.8h, v5.8h, v7.8h",
-        "addp v3.8h, v6.8h, v3.8h",
-        "trn1 v5.4s, v4.4s, v3.4s",
-        "trn2 v3.4s, v4.4s, v3.4s",
-        "addp v3.8h, v5.8h, v3.8h",
+        "mov z5.q, z2.q[1]",
+        "mov z2.q, z3.q[1]",
+        "dup v3.4s, v2.s[0]",
+        "ext v2.16b, v5.16b, v5.16b, #0",
+        "ext v6.16b, v5.16b, v5.16b, #1",
+        "ext v7.16b, v5.16b, v5.16b, #2",
+        "ext v8.16b, v5.16b, v5.16b, #3",
+        "uabdl v5.8h, v2.8b, v3.8b",
+        "uabdl v2.8h, v6.8b, v3.8b",
+        "uabdl v6.8h, v7.8b, v3.8b",
+        "uabdl v7.8h, v8.8b, v3.8b",
+        "addp v3.8h, v5.8h, v6.8h",
+        "addp v5.8h, v2.8h, v7.8h",
+        "trn1 v2.4s, v3.4s, v5.4s",
+        "trn2 v6.4s, v3.4s, v5.4s",
+        "addp v3.8h, v2.8h, v6.8h",
         "mov z1.q, q3",
-        "mov z16.d, z2.d",
+        "mov z2.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmpsadbw ymm0, ymm1, ymm2, 011b": {
-      "ExpectedInstructionCount": 34,
+      "ExpectedInstructionCount": 37,
       "Comment": [
         "Map 3 0b01 0x42 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v18.s[3]",
-        "ext v3.16b, v17.16b, v17.16b, #0",
-        "ext v4.16b, v17.16b, v17.16b, #1",
-        "ext v5.16b, v17.16b, v17.16b, #2",
-        "ext v6.16b, v17.16b, v17.16b, #3",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v2.8h, v4.8h, v2.8h",
-        "mov z3.q, z17.q[1]",
-        "mov z4.q, z18.q[1]",
-        "dup v4.4s, v4.s[0]",
-        "ext v5.16b, v3.16b, v3.16b, #0",
-        "ext v6.16b, v3.16b, v3.16b, #1",
-        "ext v7.16b, v3.16b, v3.16b, #2",
-        "ext v3.16b, v3.16b, v3.16b, #3",
-        "uabdl v5.8h, v5.8b, v4.8b",
-        "uabdl v6.8h, v6.8b, v4.8b",
-        "uabdl v7.8h, v7.8b, v4.8b",
-        "uabdl v3.8h, v3.8b, v4.8b",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v4.4s, v3.s[3]",
+        "ext v5.16b, v2.16b, v2.16b, #0",
+        "ext v6.16b, v2.16b, v2.16b, #1",
+        "ext v7.16b, v2.16b, v2.16b, #2",
+        "ext v8.16b, v2.16b, v2.16b, #3",
+        "uabdl v9.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "uabdl v7.8h, v8.8b, v4.8b",
+        "addp v4.8h, v9.8h, v6.8h",
+        "addp v6.8h, v5.8h, v7.8h",
+        "trn1 v5.4s, v4.4s, v6.4s",
+        "trn2 v7.4s, v4.4s, v6.4s",
         "addp v4.8h, v5.8h, v7.8h",
-        "addp v3.8h, v6.8h, v3.8h",
-        "trn1 v5.4s, v4.4s, v3.4s",
-        "trn2 v3.4s, v4.4s, v3.4s",
-        "addp v3.8h, v5.8h, v3.8h",
+        "mov z5.q, z2.q[1]",
+        "mov z2.q, z3.q[1]",
+        "dup v3.4s, v2.s[0]",
+        "ext v2.16b, v5.16b, v5.16b, #0",
+        "ext v6.16b, v5.16b, v5.16b, #1",
+        "ext v7.16b, v5.16b, v5.16b, #2",
+        "ext v8.16b, v5.16b, v5.16b, #3",
+        "uabdl v5.8h, v2.8b, v3.8b",
+        "uabdl v2.8h, v6.8b, v3.8b",
+        "uabdl v6.8h, v7.8b, v3.8b",
+        "uabdl v7.8h, v8.8b, v3.8b",
+        "addp v3.8h, v5.8h, v6.8h",
+        "addp v5.8h, v2.8h, v7.8h",
+        "trn1 v2.4s, v3.4s, v5.4s",
+        "trn2 v6.4s, v3.4s, v5.4s",
+        "addp v3.8h, v2.8h, v6.8h",
         "mov z1.q, q3",
-        "mov z16.d, z2.d",
+        "mov z2.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmpsadbw ymm0, ymm1, ymm2, 100b": {
-      "ExpectedInstructionCount": 34,
+      "ExpectedInstructionCount": 37,
       "Comment": [
         "Map 3 0b01 0x42 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v18.s[0]",
-        "ext v3.16b, v17.16b, v17.16b, #4",
-        "ext v4.16b, v17.16b, v17.16b, #5",
-        "ext v5.16b, v17.16b, v17.16b, #6",
-        "ext v6.16b, v17.16b, v17.16b, #7",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v2.8h, v4.8h, v2.8h",
-        "mov z3.q, z17.q[1]",
-        "mov z4.q, z18.q[1]",
-        "dup v4.4s, v4.s[0]",
-        "ext v5.16b, v3.16b, v3.16b, #0",
-        "ext v6.16b, v3.16b, v3.16b, #1",
-        "ext v7.16b, v3.16b, v3.16b, #2",
-        "ext v3.16b, v3.16b, v3.16b, #3",
-        "uabdl v5.8h, v5.8b, v4.8b",
-        "uabdl v6.8h, v6.8b, v4.8b",
-        "uabdl v7.8h, v7.8b, v4.8b",
-        "uabdl v3.8h, v3.8b, v4.8b",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v4.4s, v3.s[0]",
+        "ext v5.16b, v2.16b, v2.16b, #4",
+        "ext v6.16b, v2.16b, v2.16b, #5",
+        "ext v7.16b, v2.16b, v2.16b, #6",
+        "ext v8.16b, v2.16b, v2.16b, #7",
+        "uabdl v9.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "uabdl v7.8h, v8.8b, v4.8b",
+        "addp v4.8h, v9.8h, v6.8h",
+        "addp v6.8h, v5.8h, v7.8h",
+        "trn1 v5.4s, v4.4s, v6.4s",
+        "trn2 v7.4s, v4.4s, v6.4s",
         "addp v4.8h, v5.8h, v7.8h",
-        "addp v3.8h, v6.8h, v3.8h",
-        "trn1 v5.4s, v4.4s, v3.4s",
-        "trn2 v3.4s, v4.4s, v3.4s",
-        "addp v3.8h, v5.8h, v3.8h",
+        "mov z5.q, z2.q[1]",
+        "mov z2.q, z3.q[1]",
+        "dup v3.4s, v2.s[0]",
+        "ext v2.16b, v5.16b, v5.16b, #0",
+        "ext v6.16b, v5.16b, v5.16b, #1",
+        "ext v7.16b, v5.16b, v5.16b, #2",
+        "ext v8.16b, v5.16b, v5.16b, #3",
+        "uabdl v5.8h, v2.8b, v3.8b",
+        "uabdl v2.8h, v6.8b, v3.8b",
+        "uabdl v6.8h, v7.8b, v3.8b",
+        "uabdl v7.8h, v8.8b, v3.8b",
+        "addp v3.8h, v5.8h, v6.8h",
+        "addp v5.8h, v2.8h, v7.8h",
+        "trn1 v2.4s, v3.4s, v5.4s",
+        "trn2 v6.4s, v3.4s, v5.4s",
+        "addp v3.8h, v2.8h, v6.8h",
         "mov z1.q, q3",
-        "mov z16.d, z2.d",
+        "mov z2.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmpsadbw ymm0, ymm1, ymm2, 101b": {
-      "ExpectedInstructionCount": 34,
+      "ExpectedInstructionCount": 37,
       "Comment": [
         "Map 3 0b01 0x42 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v18.s[1]",
-        "ext v3.16b, v17.16b, v17.16b, #4",
-        "ext v4.16b, v17.16b, v17.16b, #5",
-        "ext v5.16b, v17.16b, v17.16b, #6",
-        "ext v6.16b, v17.16b, v17.16b, #7",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v2.8h, v4.8h, v2.8h",
-        "mov z3.q, z17.q[1]",
-        "mov z4.q, z18.q[1]",
-        "dup v4.4s, v4.s[0]",
-        "ext v5.16b, v3.16b, v3.16b, #0",
-        "ext v6.16b, v3.16b, v3.16b, #1",
-        "ext v7.16b, v3.16b, v3.16b, #2",
-        "ext v3.16b, v3.16b, v3.16b, #3",
-        "uabdl v5.8h, v5.8b, v4.8b",
-        "uabdl v6.8h, v6.8b, v4.8b",
-        "uabdl v7.8h, v7.8b, v4.8b",
-        "uabdl v3.8h, v3.8b, v4.8b",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v4.4s, v3.s[1]",
+        "ext v5.16b, v2.16b, v2.16b, #4",
+        "ext v6.16b, v2.16b, v2.16b, #5",
+        "ext v7.16b, v2.16b, v2.16b, #6",
+        "ext v8.16b, v2.16b, v2.16b, #7",
+        "uabdl v9.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "uabdl v7.8h, v8.8b, v4.8b",
+        "addp v4.8h, v9.8h, v6.8h",
+        "addp v6.8h, v5.8h, v7.8h",
+        "trn1 v5.4s, v4.4s, v6.4s",
+        "trn2 v7.4s, v4.4s, v6.4s",
         "addp v4.8h, v5.8h, v7.8h",
-        "addp v3.8h, v6.8h, v3.8h",
-        "trn1 v5.4s, v4.4s, v3.4s",
-        "trn2 v3.4s, v4.4s, v3.4s",
-        "addp v3.8h, v5.8h, v3.8h",
+        "mov z5.q, z2.q[1]",
+        "mov z2.q, z3.q[1]",
+        "dup v3.4s, v2.s[0]",
+        "ext v2.16b, v5.16b, v5.16b, #0",
+        "ext v6.16b, v5.16b, v5.16b, #1",
+        "ext v7.16b, v5.16b, v5.16b, #2",
+        "ext v8.16b, v5.16b, v5.16b, #3",
+        "uabdl v5.8h, v2.8b, v3.8b",
+        "uabdl v2.8h, v6.8b, v3.8b",
+        "uabdl v6.8h, v7.8b, v3.8b",
+        "uabdl v7.8h, v8.8b, v3.8b",
+        "addp v3.8h, v5.8h, v6.8h",
+        "addp v5.8h, v2.8h, v7.8h",
+        "trn1 v2.4s, v3.4s, v5.4s",
+        "trn2 v6.4s, v3.4s, v5.4s",
+        "addp v3.8h, v2.8h, v6.8h",
         "mov z1.q, q3",
-        "mov z16.d, z2.d",
+        "mov z2.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmpsadbw ymm0, ymm1, ymm2, 110b": {
-      "ExpectedInstructionCount": 34,
+      "ExpectedInstructionCount": 37,
       "Comment": [
         "Map 3 0b01 0x42 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v18.s[2]",
-        "ext v3.16b, v17.16b, v17.16b, #4",
-        "ext v4.16b, v17.16b, v17.16b, #5",
-        "ext v5.16b, v17.16b, v17.16b, #6",
-        "ext v6.16b, v17.16b, v17.16b, #7",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v2.8h, v4.8h, v2.8h",
-        "mov z3.q, z17.q[1]",
-        "mov z4.q, z18.q[1]",
-        "dup v4.4s, v4.s[0]",
-        "ext v5.16b, v3.16b, v3.16b, #0",
-        "ext v6.16b, v3.16b, v3.16b, #1",
-        "ext v7.16b, v3.16b, v3.16b, #2",
-        "ext v3.16b, v3.16b, v3.16b, #3",
-        "uabdl v5.8h, v5.8b, v4.8b",
-        "uabdl v6.8h, v6.8b, v4.8b",
-        "uabdl v7.8h, v7.8b, v4.8b",
-        "uabdl v3.8h, v3.8b, v4.8b",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v4.4s, v3.s[2]",
+        "ext v5.16b, v2.16b, v2.16b, #4",
+        "ext v6.16b, v2.16b, v2.16b, #5",
+        "ext v7.16b, v2.16b, v2.16b, #6",
+        "ext v8.16b, v2.16b, v2.16b, #7",
+        "uabdl v9.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "uabdl v7.8h, v8.8b, v4.8b",
+        "addp v4.8h, v9.8h, v6.8h",
+        "addp v6.8h, v5.8h, v7.8h",
+        "trn1 v5.4s, v4.4s, v6.4s",
+        "trn2 v7.4s, v4.4s, v6.4s",
         "addp v4.8h, v5.8h, v7.8h",
-        "addp v3.8h, v6.8h, v3.8h",
-        "trn1 v5.4s, v4.4s, v3.4s",
-        "trn2 v3.4s, v4.4s, v3.4s",
-        "addp v3.8h, v5.8h, v3.8h",
+        "mov z5.q, z2.q[1]",
+        "mov z2.q, z3.q[1]",
+        "dup v3.4s, v2.s[0]",
+        "ext v2.16b, v5.16b, v5.16b, #0",
+        "ext v6.16b, v5.16b, v5.16b, #1",
+        "ext v7.16b, v5.16b, v5.16b, #2",
+        "ext v8.16b, v5.16b, v5.16b, #3",
+        "uabdl v5.8h, v2.8b, v3.8b",
+        "uabdl v2.8h, v6.8b, v3.8b",
+        "uabdl v6.8h, v7.8b, v3.8b",
+        "uabdl v7.8h, v8.8b, v3.8b",
+        "addp v3.8h, v5.8h, v6.8h",
+        "addp v5.8h, v2.8h, v7.8h",
+        "trn1 v2.4s, v3.4s, v5.4s",
+        "trn2 v6.4s, v3.4s, v5.4s",
+        "addp v3.8h, v2.8h, v6.8h",
         "mov z1.q, q3",
-        "mov z16.d, z2.d",
+        "mov z2.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vmpsadbw ymm0, ymm1, ymm2, 111b": {
-      "ExpectedInstructionCount": 34,
+      "ExpectedInstructionCount": 37,
       "Comment": [
         "Map 3 0b01 0x42 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v2.4s, v18.s[3]",
-        "ext v3.16b, v17.16b, v17.16b, #4",
-        "ext v4.16b, v17.16b, v17.16b, #5",
-        "ext v5.16b, v17.16b, v17.16b, #6",
-        "ext v6.16b, v17.16b, v17.16b, #7",
-        "uabdl v3.8h, v3.8b, v2.8b",
-        "uabdl v4.8h, v4.8b, v2.8b",
-        "uabdl v5.8h, v5.8b, v2.8b",
-        "uabdl v2.8h, v6.8b, v2.8b",
-        "addp v3.8h, v3.8h, v5.8h",
-        "addp v2.8h, v4.8h, v2.8h",
-        "trn1 v4.4s, v3.4s, v2.4s",
-        "trn2 v2.4s, v3.4s, v2.4s",
-        "addp v2.8h, v4.8h, v2.8h",
-        "mov z3.q, z17.q[1]",
-        "mov z4.q, z18.q[1]",
-        "dup v4.4s, v4.s[0]",
-        "ext v5.16b, v3.16b, v3.16b, #0",
-        "ext v6.16b, v3.16b, v3.16b, #1",
-        "ext v7.16b, v3.16b, v3.16b, #2",
-        "ext v3.16b, v3.16b, v3.16b, #3",
-        "uabdl v5.8h, v5.8b, v4.8b",
-        "uabdl v6.8h, v6.8b, v4.8b",
-        "uabdl v7.8h, v7.8b, v4.8b",
-        "uabdl v3.8h, v3.8b, v4.8b",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v4.4s, v3.s[3]",
+        "ext v5.16b, v2.16b, v2.16b, #4",
+        "ext v6.16b, v2.16b, v2.16b, #5",
+        "ext v7.16b, v2.16b, v2.16b, #6",
+        "ext v8.16b, v2.16b, v2.16b, #7",
+        "uabdl v9.8h, v5.8b, v4.8b",
+        "uabdl v5.8h, v6.8b, v4.8b",
+        "uabdl v6.8h, v7.8b, v4.8b",
+        "uabdl v7.8h, v8.8b, v4.8b",
+        "addp v4.8h, v9.8h, v6.8h",
+        "addp v6.8h, v5.8h, v7.8h",
+        "trn1 v5.4s, v4.4s, v6.4s",
+        "trn2 v7.4s, v4.4s, v6.4s",
         "addp v4.8h, v5.8h, v7.8h",
-        "addp v3.8h, v6.8h, v3.8h",
-        "trn1 v5.4s, v4.4s, v3.4s",
-        "trn2 v3.4s, v4.4s, v3.4s",
-        "addp v3.8h, v5.8h, v3.8h",
+        "mov z5.q, z2.q[1]",
+        "mov z2.q, z3.q[1]",
+        "dup v3.4s, v2.s[0]",
+        "ext v2.16b, v5.16b, v5.16b, #0",
+        "ext v6.16b, v5.16b, v5.16b, #1",
+        "ext v7.16b, v5.16b, v5.16b, #2",
+        "ext v8.16b, v5.16b, v5.16b, #3",
+        "uabdl v5.8h, v2.8b, v3.8b",
+        "uabdl v2.8h, v6.8b, v3.8b",
+        "uabdl v6.8h, v7.8b, v3.8b",
+        "uabdl v7.8h, v8.8b, v3.8b",
+        "addp v3.8h, v5.8h, v6.8h",
+        "addp v5.8h, v2.8h, v7.8h",
+        "trn1 v2.4s, v3.4s, v5.4s",
+        "trn2 v6.4s, v3.4s, v5.4s",
+        "addp v3.8h, v2.8h, v6.8h",
         "mov z1.q, q3",
-        "mov z16.d, z2.d",
+        "mov z2.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpclmulqdq xmm0, xmm1, xmm2, 00000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 3 0b01 0x44 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "pmull v16.1q, v17.1d, v18.1d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "pmull v4.1q, v2.1d, v3.1d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpclmulqdq xmm0, xmm1, xmm2, 00001b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 3 0b01 0x44 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v0.2d, v17.d[1]",
-        "pmull v16.1q, v0.1d, v18.1d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v0.2d, v2.d[1]",
+        "pmull v4.1q, v0.1d, v3.1d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpclmulqdq xmm0, xmm1, xmm2, 10000b": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "Map 3 0b01 0x44 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "dup v0.2d, v18.d[1]",
-        "pmull v16.1q, v0.1d, v17.1d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "dup v0.2d, v3.d[1]",
+        "pmull v4.1q, v0.1d, v2.1d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpclmulqdq xmm0, xmm1, xmm2, 10001b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map 3 0b01 0x44 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "pmull2 v16.1q, v17.2d, v18.2d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "pmull2 v4.1q, v2.2d, v3.2d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpclmulqdq ymm0, ymm1, ymm2, 00000b": {
@@ -4163,424 +4944,521 @@
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00000000b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q17",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, q17",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z1.q, q2",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00000001b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z17.q[1]",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, q17",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z1.q, q2",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00000010b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q18",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, q17",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.q, q3",
+        "mov z5.d, z4.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, q2",
+        "mov z3.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00000011b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z18.q[1]",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, q17",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.q, z3.q[1]",
+        "mov z5.d, z4.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, q2",
+        "mov z3.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00010000b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q17",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, z17.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z1.q, z2.q[1]",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00010001b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z17.q[1]",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, z17.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z1.q, z2.q[1]",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00010010b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q18",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, z17.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.q, q3",
+        "mov z5.d, z4.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, z2.q[1]",
+        "mov z3.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00010011b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z18.q[1]",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, z17.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.q, z3.q[1]",
+        "mov z5.d, z4.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, z2.q[1]",
+        "mov z3.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00100000b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q17",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, q18",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z5.d, z4.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, q3",
+        "mov z2.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00100001b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z17.q[1]",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, q18",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z5.d, z4.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, q3",
+        "mov z2.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00100010b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q18",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, q18",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z18.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z1.q, q2",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00100011b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z18.q[1]",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, q18",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z18.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z1.q, q2",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00110000b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q17",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, z18.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z5.d, z4.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, z3.q[1]",
+        "mov z2.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00110001b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z17.q[1]",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, z18.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "movi v4.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z5.d, z4.d",
+        "mov z5.b, p6/m, z1.b",
+        "mov z1.q, z3.q[1]",
+        "mov z2.d, z5.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00110010b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q18",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, z18.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z18.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z1.q, z2.q[1]",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00110011b": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z18.q[1]",
-        "mov z2.b, p6/m, z1.b",
-        "mov z1.q, z18.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z18.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z1.q, z2.q[1]",
+        "mov z3.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z3.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00001000b": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q17",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z4.d, z3.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z4.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00011000b": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z17.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z4.d, z3.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z4.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00101000b": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q18",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z18.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z4.d, z3.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z4.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 00111000b": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z18.q[1]",
-        "mov z16.d, z2.d",
+        "mov z2.d, p7/m, z18.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z4.d, z3.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z4.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 10001000b": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 10000000b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q17",
-        "mov z16.d, z2.d",
-        "mov z16.b, p6/m, z1.b"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 10000001b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z17.q[1]",
-        "mov z16.d, z2.d",
-        "mov z16.b, p6/m, z1.b"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 10000010b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, q18",
-        "mov z16.d, z2.d",
-        "mov z16.b, p6/m, z1.b"
+        "mov z2.d, p7/m, z18.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, q2",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vperm2i128 ymm0, ymm1, ymm2, 10000011b": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "Map 3 0b01 0x46 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "mov z1.q, z18.q[1]",
-        "mov z16.d, z2.d",
-        "mov z16.b, p6/m, z1.b"
+        "mov z2.d, p7/m, z18.d",
+        "movi v3.2d, #0x0",
+        "mov z1.q, z2.q[1]",
+        "mov z4.d, z3.d",
+        "mov z4.b, p6/m, z1.b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vblendvps xmm0, xmm1, xmm2, xmm3": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0x4a 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sshr v2.4s, v19.4s, #31",
-        "mov v16.16b, v2.16b",
-        "bsl v16.16b, v18.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z4.d, p7/m, z19.d",
+        "sshr v5.4s, v4.4s, #31",
+        "mov v4.16b, v5.16b",
+        "bsl v4.16b, v3.16b, v2.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vblendvps ymm0, ymm1, ymm2, ymm3": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 3 0b01 0x4a 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z2, z19",
-        "asr z2.s, p7/m, z2.s, #31",
-        "movprfx z0, z18",
-        "bsl z0.d, z0.d, z17.d, z2.d",
-        "mov z16.d, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z4.d, p7/m, z19.d",
+        "movprfx z5, z4",
+        "asr z5.s, p7/m, z5.s, #31",
+        "movprfx z0, z3",
+        "bsl z0.d, z0.d, z2.d, z5.d",
+        "mov z4.d, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vblendvpd xmm0, xmm1, xmm2, xmm3": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0x4b 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sshr v2.2d, v19.2d, #63",
-        "mov v16.16b, v2.16b",
-        "bsl v16.16b, v18.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z4.d, p7/m, z19.d",
+        "sshr v5.2d, v4.2d, #63",
+        "mov v4.16b, v5.16b",
+        "bsl v4.16b, v3.16b, v2.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vblendvpd ymm0, ymm1, ymm2, ymm3": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 3 0b01 0x4b 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z2, z19",
-        "asr z2.d, p7/m, z2.d, #63",
-        "movprfx z0, z18",
-        "bsl z0.d, z0.d, z17.d, z2.d",
-        "mov z16.d, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z4.d, p7/m, z19.d",
+        "movprfx z5, z4",
+        "asr z5.d, p7/m, z5.d, #63",
+        "movprfx z0, z3",
+        "bsl z0.d, z0.d, z2.d, z5.d",
+        "mov z4.d, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpblendvb xmm0, xmm1, xmm2, xmm3": {
-      "ExpectedInstructionCount": 3,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0x4c 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sshr v2.16b, v19.16b, #7",
-        "mov v16.16b, v2.16b",
-        "bsl v16.16b, v18.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z4.d, p7/m, z19.d",
+        "sshr v5.16b, v4.16b, #7",
+        "mov v4.16b, v5.16b",
+        "bsl v4.16b, v3.16b, v2.16b",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpblendvb ymm0, ymm1, ymm2, ymm3": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map 3 0b01 0x4c 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z2, z19",
-        "asr z2.b, p7/m, z2.b, #7",
-        "movprfx z0, z18",
-        "bsl z0.d, z0.d, z17.d, z2.d",
-        "mov z16.d, z0.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z3.d, p7/m, z18.d",
+        "mov z4.d, p7/m, z19.d",
+        "movprfx z5, z4",
+        "asr z5.b, p7/m, z5.b, #7",
+        "movprfx z0, z3",
+        "bsl z0.d, z0.d, z2.d, z5.d",
+        "mov z4.d, z0.d",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vfmaddsubps xmm0, xmm1, xmm2, xmm3": {
@@ -4794,86 +5672,101 @@
       ]
     },
     "vaeskeygenassist xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "Map 3 0b01 0xdf 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2160]",
-        "movi v3.2d, #0x0",
-        "mov v16.16b, v17.16b",
+        "mov z2.d, p7/m, z17.d",
+        "ldr q3, [x28, #2160]",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v2.16b",
         "unimplemented (Unimplemented)",
-        "tbl v16.16b, {v16.16b}, v2.16b"
+        "tbl v5.16b, {v5.16b}, v3.16b",
+        "mov z16.d, p7/m, z5.d"
       ]
     },
     "vaeskeygenassist xmm0, xmm1, 0xFF": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map 3 0b01 0xdf 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ldr q2, [x28, #2160]",
-        "movi v3.2d, #0x0",
-        "mov v16.16b, v17.16b",
+        "mov z2.d, p7/m, z17.d",
+        "ldr q3, [x28, #2160]",
+        "movi v4.2d, #0x0",
+        "mov v5.16b, v2.16b",
         "unimplemented (Unimplemented)",
-        "tbl v16.16b, {v16.16b}, v2.16b",
+        "tbl v5.16b, {v5.16b}, v3.16b",
         "mov x0, #0xff00000000",
         "dup v1.2d, x0",
-        "eor v16.16b, v16.16b, v1.16b"
+        "eor v5.16b, v5.16b, v1.16b",
+        "mov z16.d, p7/m, z5.d"
       ]
     },
     "rorx eax, ebx, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b11 0xf0 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov w4, w7"
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x4, x21"
       ]
     },
     "rorx eax, eax, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b11 0xf0 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov w4, w4"
+        "mov x20, x4",
+        "mov w21, w20",
+        "mov x4, x21"
       ]
     },
     "rorx eax, ebx, 31": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b11 0xf0 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "ror w4, w7, #31"
+        "mov x20, x7",
+        "ror w21, w20, #31",
+        "mov x4, x21"
       ]
     },
     "rorx eax, ebx, 32": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b11 0xf0 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov w4, w7"
+        "mov x20, x7",
+        "mov w21, w20",
+        "mov x4, x21"
       ]
     },
     "rorx eax, eax, 32": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b11 0xf0 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov w4, w4"
+        "mov x20, x4",
+        "mov w21, w20",
+        "mov x4, x21"
       ]
     },
     "rorx rax, rbx, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b11 0xf0 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov x4, x7"
+        "mov x20, x7",
+        "mov x4, x20"
       ]
     },
     "rorx rax, rax, 0": {
@@ -4884,21 +5777,24 @@
       "ExpectedArm64ASM": []
     },
     "rorx rax, rbx, 63": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map 3 0b11 0xf0 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "ror x4, x7, #63"
+        "mov x20, x7",
+        "ror x21, x20, #63",
+        "mov x4, x21"
       ]
     },
     "rorx rax, rbx, 64": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map 3 0b11 0xf0 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov x4, x7"
+        "mov x20, x7",
+        "mov x4, x20"
       ]
     },
     "rorx rax, rax, 64": {
diff --git a/unittests/InstructionCountCI/VEX_map_group.json b/unittests/InstructionCountCI/VEX_map_group.json
index 21859ec7f2..ac9b749e2c 100644
--- a/unittests/InstructionCountCI/VEX_map_group.json
+++ b/unittests/InstructionCountCI/VEX_map_group.json
@@ -11,579 +11,686 @@
   },
   "Instructions": {
     "vpsrlw xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 12 0b010 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrlw xmm0, xmm1, 15": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 12 0b010 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ushr v16.8h, v17.8h, #15"
+        "mov z2.d, p7/m, z17.d",
+        "ushr v3.8h, v2.8h, #15",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrlw xmm0, xmm1, 16": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 12 0b010 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrlw ymm0, ymm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map group 12 0b010 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsrlw ymm0, ymm1, 15": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map group 12 0b010 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "lsr z16.h, p7/m, z16.h, #15"
+        "mov z2.d, p7/m, z17.d",
+        "movprfx z3, z2",
+        "lsr z3.h, p7/m, z3.h, #15",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrlw ymm0, ymm1, 16": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 12 0b010 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsraw xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 12 0b100 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsraw xmm0, xmm1, 15": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 12 0b100 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sshr v16.8h, v17.8h, #15"
+        "mov z2.d, p7/m, z17.d",
+        "sshr v3.8h, v2.8h, #15",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsraw xmm0, xmm1, 16": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 12 0b100 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sshr v16.8h, v17.8h, #15"
+        "mov z2.d, p7/m, z17.d",
+        "sshr v3.8h, v2.8h, #15",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsraw ymm0, ymm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map group 12 0b100 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsraw ymm0, ymm1, 15": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map group 12 0b100 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "asr z16.h, p7/m, z16.h, #15"
+        "mov z2.d, p7/m, z17.d",
+        "movprfx z3, z2",
+        "asr z3.h, p7/m, z3.h, #15",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsraw ymm0, ymm1, 16": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map group 12 0b100 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "asr z16.h, p7/m, z16.h, #15"
+        "mov z2.d, p7/m, z17.d",
+        "movprfx z3, z2",
+        "asr z3.h, p7/m, z3.h, #15",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsllw xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 12 0b110 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsllw xmm0, xmm1, 15": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 12 0b110 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "shl v16.8h, v17.8h, #15"
+        "mov z2.d, p7/m, z17.d",
+        "shl v3.8h, v2.8h, #15",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsllw xmm0, xmm1, 16": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 12 0b110 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsllw ymm0, ymm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map group 12 0b110 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsllw ymm0, ymm1, 15": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map group 12 0b110 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "lsl z16.h, p7/m, z16.h, #15"
+        "mov z2.d, p7/m, z17.d",
+        "movprfx z3, z2",
+        "lsl z3.h, p7/m, z3.h, #15",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsllw ymm0, ymm1, 16": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 12 0b110 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrld xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 13 0b010 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrld xmm0, xmm1, 31": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 13 0b010 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ushr v16.4s, v17.4s, #31"
+        "mov z2.d, p7/m, z17.d",
+        "ushr v3.4s, v2.4s, #31",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrld xmm0, xmm1, 32": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 13 0b010 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrld ymm0, ymm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map group 13 0b010 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsrld ymm0, ymm1, 31": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map group 13 0b010 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "lsr z16.s, p7/m, z16.s, #31"
+        "mov z2.d, p7/m, z17.d",
+        "movprfx z3, z2",
+        "lsr z3.s, p7/m, z3.s, #31",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrld ymm0, ymm1, 32": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 13 0b010 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrad xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 13 0b100 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrad xmm0, xmm1, 31": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 13 0b100 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sshr v16.4s, v17.4s, #31"
+        "mov z2.d, p7/m, z17.d",
+        "sshr v3.4s, v2.4s, #31",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrad xmm0, xmm1, 32": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 13 0b100 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "sshr v16.4s, v17.4s, #31"
+        "mov z2.d, p7/m, z17.d",
+        "sshr v3.4s, v2.4s, #31",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrad ymm0, ymm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map group 13 0b100 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsrad ymm0, ymm1, 31": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map group 13 0b100 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "asr z16.s, p7/m, z16.s, #31"
+        "mov z2.d, p7/m, z17.d",
+        "movprfx z3, z2",
+        "asr z3.s, p7/m, z3.s, #31",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrad ymm0, ymm1, 32": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map group 13 0b100 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "asr z16.s, p7/m, z16.s, #31"
+        "mov z2.d, p7/m, z17.d",
+        "movprfx z3, z2",
+        "asr z3.s, p7/m, z3.s, #31",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpslld xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 13 0b110 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpslld xmm0, xmm1, 31": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 13 0b110 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "shl v16.4s, v17.4s, #31"
+        "mov z2.d, p7/m, z17.d",
+        "shl v3.4s, v2.4s, #31",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpslld xmm0, xmm1, 32": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 13 0b110 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpslld ymm0, ymm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map group 13 0b110 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpslld ymm0, ymm1, 31": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map group 13 0b110 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "lsl z16.s, p7/m, z16.s, #31"
+        "mov z2.d, p7/m, z17.d",
+        "movprfx z3, z2",
+        "lsl z3.s, p7/m, z3.s, #31",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpslld ymm0, ymm1, 32": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 13 0b110 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrlq xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 14 0b010 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrlq xmm0, xmm1, 63": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 14 0b010 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "ushr v16.2d, v17.2d, #63"
+        "mov z2.d, p7/m, z17.d",
+        "ushr v3.2d, v2.2d, #63",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrlq xmm0, xmm1, 64": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 14 0b010 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrlq ymm0, ymm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map group 14 0b010 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsrlq ymm0, ymm1, 63": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map group 14 0b010 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "lsr z16.d, p7/m, z16.d, #63"
+        "mov z2.d, p7/m, z17.d",
+        "movprfx z3, z2",
+        "lsr z3.d, p7/m, z3.d, #63",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrlq ymm0, ymm1, 64": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 14 0b010 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrldq xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 14 0b011 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsrldq xmm0, xmm1, 15": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map group 14 0b011 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "ext v16.16b, v17.16b, v2.16b, #15"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "ext v4.16b, v2.16b, v3.16b, #15",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpsrldq xmm0, xmm1, 16": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map group 14 0b011 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsrldq ymm0, ymm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map group 14 0b011 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsrldq ymm0, ymm1, 15": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map group 14 0b011 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "ext v3.16b, v17.16b, v2.16b, #15",
-        "movprfx z1, z17",
-        "ext z1.b, z1.b, z2.b, #31",
-        "mov z2.d, z1.d",
-        "mov z1.q, q2",
-        "mov z16.d, z3.d",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "ext v4.16b, v2.16b, v3.16b, #15",
+        "movprfx z5, z2",
+        "ext z5.b, z5.b, z3.b, #31",
+        "mov z1.q, q5",
+        "mov z2.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsrldq ymm0, ymm1, 16": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map group 14 0b011 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsllq xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 14 0b110 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsllq xmm0, xmm1, 63": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 14 0b110 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "shl v16.2d, v17.2d, #63"
+        "mov z2.d, p7/m, z17.d",
+        "shl v3.2d, v2.2d, #63",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsllq xmm0, xmm1, 64": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 14 0b110 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsllq ymm0, ymm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map group 14 0b110 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpsllq ymm0, ymm1, 63": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map group 14 0b110 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movprfx z16, z17",
-        "lsl z16.d, p7/m, z16.d, #63"
+        "mov z2.d, p7/m, z17.d",
+        "movprfx z3, z2",
+        "lsl z3.d, p7/m, z3.d, #63",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpsllq ymm0, ymm1, 64": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 14 0b110 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpslldq xmm0, xmm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "Map group 14 0b111 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov v16.16b, v17.16b"
+        "mov z2.d, p7/m, z17.d",
+        "mov v3.16b, v2.16b",
+        "mov z16.d, p7/m, z3.d"
       ]
     },
     "vpslldq xmm0, xmm1, 15": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 4,
       "Comment": [
         "Map group 14 0b111 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "ext v16.16b, v2.16b, v17.16b, #1"
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "ext v4.16b, v3.16b, v2.16b, #1",
+        "mov z16.d, p7/m, z4.d"
       ]
     },
     "vpslldq xmm0, xmm1, 16": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map group 14 0b111 128-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpslldq ymm0, ymm1, 0": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map group 14 0b111 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "mov z16.d, p7/m, z17.d"
+        "mov z2.d, p7/m, z17.d",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpslldq ymm0, ymm1, 15": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map group 14 0b111 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v2.2d, #0x0",
-        "ext v3.16b, v2.16b, v17.16b, #1",
-        "ext z2.b, z2.b, z17.b, #17",
-        "mov z1.q, q2",
-        "mov z16.d, z3.d",
+        "mov z2.d, p7/m, z17.d",
+        "movi v3.2d, #0x0",
+        "ext v4.16b, v3.16b, v2.16b, #1",
+        "movprfx z5, z3",
+        "ext z5.b, z5.b, z2.b, #17",
+        "mov z1.q, q5",
+        "mov z2.d, z4.d",
         "not p0.b, p7/z, p6.b",
-        "mov z16.b, p0/m, z1.b"
+        "mov z2.b, p0/m, z1.b",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vpslldq ymm0, ymm1, 16": {
-      "ExpectedInstructionCount": 1,
+      "ExpectedInstructionCount": 2,
       "Comment": [
         "Map group 14 0b111 256-bit"
       ],
       "ExpectedArm64ASM": [
-        "movi v16.2d, #0x0"
+        "movi v2.2d, #0x0",
+        "mov z16.d, p7/m, z2.d"
       ]
     },
     "vldmxcsr [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map group 15 0b010"
       ],
       "ExpectedArm64ASM": [
-        "ldr w20, [x4]",
-        "ubfx w20, w20, #13, #3",
+        "mov x20, x4",
+        "ldr w21, [x20]",
+        "ubfx w20, w21, #13, #3",
         "rbit w1, w20",
         "lsr w1, w1, #30",
         "mrs x0, fpcr",
@@ -594,7 +701,7 @@
       ]
     },
     "vstmxcsr [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map group 15 0b011"
       ],
@@ -604,102 +711,116 @@
         "ubfx x21, x21, #22, #3",
         "rbit w0, w21",
         "bfi x21, x0, #30, #2",
-        "bfi w20, w21, #13, #3",
-        "str w20, [x4]"
+        "mov w22, w20",
+        "bfi w22, w21, #13, #3",
+        "mov x20, x4",
+        "str w22, [x20]"
       ]
     },
     "blsr eax, ebx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map group 17 0b001 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "sub w20, w7, #0x1 (1)",
-        "and w4, w20, w7",
-        "cmp x7, #0x0 (0)",
-        "cset x20, eq",
-        "tst w4, w4",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "sub w21, w20, #0x1 (1)",
+        "and w22, w21, w20",
+        "mov x4, x22",
+        "cmp x20, #0x0 (0)",
+        "cset x21, eq",
+        "tst w22, w22",
+        "mrs x20, nzcv",
+        "orr w22, w20, w21, lsl #29",
+        "msr nzcv, x22"
       ]
     },
     "blsr rax, rbx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map group 17 0b001 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "sub x20, x7, #0x1 (1)",
-        "and x4, x20, x7",
-        "cmp x7, #0x0 (0)",
-        "cset x20, eq",
-        "tst x4, x4",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "sub x21, x20, #0x1 (1)",
+        "and x22, x21, x20",
+        "mov x4, x22",
+        "cmp x20, #0x0 (0)",
+        "cset x21, eq",
+        "tst x22, x22",
+        "mrs x20, nzcv",
+        "orr w22, w20, w21, lsl #29",
+        "msr nzcv, x22"
       ]
     },
     "blsmsk eax, ebx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map group 17 0b010 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "sub w20, w7, #0x1 (1)",
-        "eor w4, w20, w7",
-        "cmp x7, #0x0 (0)",
-        "cset x20, eq",
-        "tst w4, w4",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "sub w21, w20, #0x1 (1)",
+        "eor w22, w21, w20",
+        "mov x4, x22",
+        "cmp x20, #0x0 (0)",
+        "cset x21, eq",
+        "tst w22, w22",
+        "mrs x20, nzcv",
+        "orr w22, w20, w21, lsl #29",
+        "msr nzcv, x22"
       ]
     },
     "blsmsk rax, rbx": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "Map group 17 0b010 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "sub x20, x7, #0x1 (1)",
-        "eor x4, x20, x7",
-        "cmp x7, #0x0 (0)",
-        "cset x20, eq",
-        "tst x4, x4",
-        "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "mov x20, x7",
+        "sub x21, x20, #0x1 (1)",
+        "eor x22, x21, x20",
+        "mov x4, x22",
+        "cmp x20, #0x0 (0)",
+        "cset x21, eq",
+        "tst x22, x22",
+        "mrs x20, nzcv",
+        "orr w22, w20, w21, lsl #29",
+        "msr nzcv, x22"
       ]
     },
     "blsi eax, ebx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map group 17 0b011 32-bit"
       ],
       "ExpectedArm64ASM": [
-        "neg w20, w7",
-        "and w4, w7, w20",
-        "tst w4, w4",
+        "mov x20, x7",
+        "neg w21, w20",
+        "and w22, w20, w21",
+        "mov x4, x22",
+        "tst w22, w22",
         "cset w20, ne",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "orr w22, w21, w20, lsl #29",
+        "msr nzcv, x22"
       ]
     },
     "blsi rax, rbx": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "Map group 17 0b011 64-bit"
       ],
       "ExpectedArm64ASM": [
-        "neg x20, x7",
-        "and x4, x7, x20",
-        "tst x4, x4",
+        "mov x20, x7",
+        "neg x21, x20",
+        "and x22, x20, x21",
+        "mov x4, x22",
+        "tst x22, x22",
         "cset w20, ne",
         "mrs x21, nzcv",
-        "orr w20, w21, w20, lsl #29",
-        "msr nzcv, x20"
+        "orr w22, w21, w20, lsl #29",
+        "msr nzcv, x22"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/x87.json b/unittests/InstructionCountCI/x87.json
index 501a424251..1547052002 100644
--- a/unittests/InstructionCountCI/x87.json
+++ b/unittests/InstructionCountCI/x87.json
@@ -13,13 +13,14 @@
   },
   "Instructions": {
     "fadd dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xd8 !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -44,11 +45,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -61,10 +62,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1408]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -76,21 +77,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xd8 !11b /1"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -115,11 +117,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -132,10 +134,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1424]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -147,21 +149,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fcom dword [rax]": {
-      "ExpectedInstructionCount": 69,
+      "ExpectedInstructionCount": 70,
       "Comment": [
         "0xd8 !11b /2"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -186,11 +189,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -203,10 +206,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1304]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -221,24 +224,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcomp dword [rax]": {
-      "ExpectedInstructionCount": 77,
+      "ExpectedInstructionCount": 78,
       "Comment": [
         "0xd8 !11b /3"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -263,11 +267,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -280,10 +284,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1304]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -298,32 +302,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fsub dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xd8 !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -348,11 +353,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -365,10 +370,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1416]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -380,21 +385,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xd8 !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -419,11 +425,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -436,10 +442,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
-        "mov x3, v3.d[0]",
-        "umov w4, v3.h[4]",
+        "mov x1, v3.d[0]",
+        "umov w2, v3.h[4]",
+        "mov x3, v2.d[0]",
+        "umov w4, v2.h[4]",
         "ldr x5, [x28, #1416]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -451,21 +457,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xd8 !11b /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -490,11 +497,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -507,10 +514,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1432]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -522,21 +529,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xd8 !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -561,11 +569,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -578,10 +586,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
-        "mov x3, v3.d[0]",
-        "umov w4, v3.h[4]",
+        "mov x1, v3.d[0]",
+        "umov w2, v3.h[4]",
+        "mov x3, v2.d[0]",
+        "umov w4, v2.h[4]",
         "ldr x5, [x28, #1432]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -593,11 +601,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st0, st0": {
@@ -608,8 +616,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -640,11 +648,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st0, st1": {
@@ -655,8 +663,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -687,11 +695,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st0, st2": {
@@ -702,8 +710,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -734,11 +742,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st0, st3": {
@@ -749,8 +757,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -781,11 +789,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st0, st4": {
@@ -796,8 +804,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -828,11 +836,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st0, st5": {
@@ -843,8 +851,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -875,11 +883,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st0, st6": {
@@ -890,8 +898,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -922,11 +930,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st0, st7": {
@@ -937,8 +945,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -969,11 +977,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st0, st0": {
@@ -984,8 +992,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1016,11 +1024,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st0, st1": {
@@ -1031,8 +1039,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1063,11 +1071,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st0, st2": {
@@ -1078,8 +1086,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1110,11 +1118,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st0, st3": {
@@ -1125,8 +1133,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1157,11 +1165,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st0, st4": {
@@ -1172,8 +1180,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1204,11 +1212,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st0, st5": {
@@ -1219,8 +1227,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1251,11 +1259,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st0, st6": {
@@ -1266,8 +1274,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1298,11 +1306,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st0, st7": {
@@ -1313,8 +1321,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1345,11 +1353,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fcom st0, st0": {
@@ -1361,8 +1369,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x0",
         "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1396,13 +1404,13 @@
         "mov x20, x0",
         "ubfx x22, x20, #1, #1",
         "ubfx x23, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w22, w22, w20",
-        "orr w23, w23, w20",
-        "strb w22, [x28, #744]",
+        "ubfx x24, x20, #2, #1",
+        "orr w20, w22, w24",
+        "orr w22, w23, w24",
+        "strb w20, [x28, #744]",
         "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w23, [x28, #750]"
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]"
       ]
     },
     "fcom st0, st1": {
@@ -1413,8 +1421,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1448,14 +1456,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcom st0, st2": {
@@ -1466,8 +1474,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1501,14 +1509,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcom st0, st3": {
@@ -1519,8 +1527,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1554,14 +1562,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcom st0, st4": {
@@ -1572,8 +1580,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1607,14 +1615,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcom st0, st5": {
@@ -1625,8 +1633,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1660,14 +1668,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcom st0, st6": {
@@ -1678,8 +1686,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1713,14 +1721,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcom st0, st7": {
@@ -1731,8 +1739,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1766,14 +1774,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcomp st0, st0": {
@@ -1785,8 +1793,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x0",
         "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1820,20 +1828,20 @@
         "mov x22, x0",
         "ubfx x23, x22, #1, #1",
         "ubfx x24, x22, #0, #1",
-        "ubfx x22, x22, #2, #1",
-        "orr w23, w23, w22",
-        "orr w24, w24, w22",
-        "strb w23, [x28, #744]",
+        "ubfx x25, x22, #2, #1",
+        "orr w22, w23, w25",
+        "orr w23, w24, w25",
+        "strb w22, [x28, #744]",
         "strb w21, [x28, #745]",
-        "strb w22, [x28, #746]",
-        "strb w24, [x28, #750]",
+        "strb w25, [x28, #746]",
+        "strb w23, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -1846,8 +1854,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1881,20 +1889,20 @@
         "mov x22, x0",
         "ubfx x23, x22, #1, #1",
         "ubfx x24, x22, #0, #1",
-        "ubfx x22, x22, #2, #1",
-        "orr w23, w23, w22",
-        "orr w24, w24, w22",
-        "strb w23, [x28, #744]",
-        "mov w23, #0x0",
-        "strb w23, [x28, #745]",
-        "strb w22, [x28, #746]",
-        "strb w24, [x28, #750]",
+        "ubfx x25, x22, #2, #1",
+        "orr w22, w23, w25",
+        "orr w23, w24, w25",
+        "strb w22, [x28, #744]",
+        "mov w22, #0x0",
+        "strb w22, [x28, #745]",
+        "strb w25, [x28, #746]",
+        "strb w23, [x28, #750]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -1906,8 +1914,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -1941,21 +1949,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -1967,8 +1975,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2002,21 +2010,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -2028,8 +2036,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2063,21 +2071,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -2089,8 +2097,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2124,21 +2132,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -2150,8 +2158,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2185,21 +2193,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -2211,8 +2219,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2246,21 +2254,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -2272,8 +2280,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2304,11 +2312,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st0, st1": {
@@ -2319,8 +2327,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2351,11 +2359,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st0, st2": {
@@ -2366,8 +2374,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2398,11 +2406,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st0, st3": {
@@ -2413,8 +2421,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2445,11 +2453,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st0, st4": {
@@ -2460,8 +2468,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2492,11 +2500,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st0, st5": {
@@ -2507,8 +2515,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2539,11 +2547,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st0, st6": {
@@ -2554,8 +2562,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2586,11 +2594,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st0, st7": {
@@ -2601,8 +2609,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2633,11 +2641,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st0, st0": {
@@ -2648,8 +2656,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2680,11 +2688,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st0, st1": {
@@ -2695,8 +2703,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2727,11 +2735,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st0, st2": {
@@ -2742,8 +2750,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2774,11 +2782,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st0, st3": {
@@ -2789,8 +2797,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2821,11 +2829,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st0, st4": {
@@ -2836,8 +2844,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2868,11 +2876,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st0, st5": {
@@ -2883,8 +2891,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2915,11 +2923,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st0, st6": {
@@ -2930,8 +2938,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -2962,11 +2970,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st0, st7": {
@@ -2977,8 +2985,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3009,11 +3017,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st0, st0": {
@@ -3024,8 +3032,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3056,11 +3064,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st0, st1": {
@@ -3071,8 +3079,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3103,11 +3111,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st0, st2": {
@@ -3118,8 +3126,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3150,11 +3158,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st0, st3": {
@@ -3165,8 +3173,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3197,11 +3205,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st0, st4": {
@@ -3212,8 +3220,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3244,11 +3252,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st0, st5": {
@@ -3259,8 +3267,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3291,11 +3299,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st0, st6": {
@@ -3306,8 +3314,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3338,11 +3346,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st0, st7": {
@@ -3353,8 +3361,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3385,11 +3393,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st0, st0": {
@@ -3400,8 +3408,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3432,11 +3440,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st0, st1": {
@@ -3447,8 +3455,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3479,11 +3487,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st0, st2": {
@@ -3494,8 +3502,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3526,11 +3534,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st0, st3": {
@@ -3541,8 +3549,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3573,11 +3581,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st0, st4": {
@@ -3588,8 +3596,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3620,11 +3628,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st0, st5": {
@@ -3635,8 +3643,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3667,11 +3675,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st0, st6": {
@@ -3682,8 +3690,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3714,11 +3722,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st0, st7": {
@@ -3729,8 +3737,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -3761,21 +3769,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fld dword [rax]": {
-      "ExpectedInstructionCount": 39,
+      "ExpectedInstructionCount": 40,
       "Comment": [
         "0xd9 !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
+        "mov x21, x4",
+        "ldr s2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -3800,23 +3809,23 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fst dword [rax]": {
-      "ExpectedInstructionCount": 30,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "0xd9 !11b /2"
       ],
@@ -3849,12 +3858,13 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x4]"
+        "fmov s3, s0",
+        "mov x20, x4",
+        "str s3, [x20]"
       ]
     },
     "fstp dword [rax]": {
-      "ExpectedInstructionCount": 38,
+      "ExpectedInstructionCount": 39,
       "Comment": [
         "0xd9 !11b /3"
       ],
@@ -3887,82 +3897,85 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "fmov s2, s0",
-        "str s2, [x4]",
+        "fmov s3, s0",
+        "mov x21, x4",
+        "str s3, [x21]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fldenv [rax]": {
-      "ExpectedInstructionCount": 48,
+      "ExpectedInstructionCount": 49,
       "Comment": [
         "0xd9 !11b /4"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x4]",
-        "strh w20, [x28, #1024]",
-        "ldr w20, [x4, #4]",
-        "ubfx w21, w20, #11, #3",
-        "strb w21, [x28, #747]",
-        "ubfx w21, w20, #8, #1",
-        "ubfx w22, w20, #9, #1",
-        "ubfx w23, w20, #10, #1",
-        "ubfx w20, w20, #14, #1",
-        "strb w21, [x28, #744]",
-        "strb w22, [x28, #745]",
-        "strb w23, [x28, #746]",
-        "strb w20, [x28, #750]",
-        "ldr w20, [x4, #8]",
-        "ubfx w21, w20, #0, #2",
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "strh w21, [x28, #1024]",
+        "ldr w21, [x20, #4]",
+        "ubfx w22, w21, #11, #3",
+        "strb w22, [x28, #747]",
+        "ubfx w22, w21, #8, #1",
+        "ubfx w23, w21, #9, #1",
+        "ubfx w24, w21, #10, #1",
+        "ubfx w25, w21, #14, #1",
+        "strb w22, [x28, #744]",
+        "strb w23, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w25, [x28, #750]",
+        "ldr w21, [x20, #8]",
+        "ubfx w20, w21, #0, #2",
         "mrs x22, nzcv",
-        "cmp x21, #0x3 (3)",
-        "cset x21, ne",
-        "ubfx w23, w20, #2, #2",
-        "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #1",
-        "ubfx w23, w20, #4, #2",
-        "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #2",
-        "ubfx w23, w20, #6, #2",
-        "cmp x23, #0x3 (3)",
+        "cmp x20, #0x3 (3)",
         "cset x23, ne",
-        "orr w21, w21, w23, lsl #3",
-        "ubfx w23, w20, #8, #2",
+        "ubfx w20, w21, #2, #2",
+        "cmp x20, #0x3 (3)",
+        "cset x24, ne",
+        "orr w20, w23, w24, lsl #1",
+        "ubfx w23, w21, #4, #2",
         "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #4",
-        "ubfx w23, w20, #10, #2",
+        "cset x24, ne",
+        "orr w23, w20, w24, lsl #2",
+        "ubfx w20, w21, #6, #2",
+        "cmp x20, #0x3 (3)",
+        "cset x24, ne",
+        "orr w20, w23, w24, lsl #3",
+        "ubfx w23, w21, #8, #2",
         "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #5",
-        "ubfx w23, w20, #12, #2",
+        "cset x24, ne",
+        "orr w23, w20, w24, lsl #4",
+        "ubfx w20, w21, #10, #2",
+        "cmp x20, #0x3 (3)",
+        "cset x24, ne",
+        "orr w20, w23, w24, lsl #5",
+        "ubfx w23, w21, #12, #2",
         "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #6",
-        "ubfx w20, w20, #14, #2",
+        "cset x24, ne",
+        "orr w23, w20, w24, lsl #6",
+        "ubfx w20, w21, #14, #2",
         "cmp x20, #0x3 (3)",
-        "cset x20, ne",
-        "orr w20, w21, w20, lsl #7",
+        "cset x21, ne",
+        "orr w20, w23, w21, lsl #7",
         "strb w20, [x28, #1026]",
         "msr nzcv, x22"
       ]
     },
     "fldcw [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0xd9 !11b /5"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x4]",
-        "strh w20, [x28, #1024]"
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "strh w21, [x28, #1024]"
       ]
     },
     "fnstenv [rax]": {
@@ -3971,80 +3984,81 @@
         "0xd9 !11b /6"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x28, #1024]",
-        "str w20, [x4]",
-        "mov w20, #0x0",
-        "ldrb w21, [x28, #747]",
-        "mov x0, x20",
-        "bfi x0, x21, #11, #3",
-        "mov x21, x0",
+        "mov x20, x4",
+        "ldrh w21, [x28, #1024]",
+        "str w21, [x20]",
+        "mov w21, #0x0",
+        "ldrb w22, [x28, #747]",
+        "mov x23, x21",
+        "bfi x23, x22, #11, #3",
         "ldrb w22, [x28, #744]",
-        "ldrb w23, [x28, #745]",
-        "ldrb w24, [x28, #746]",
-        "ldrb w25, [x28, #750]",
-        "orr x21, x21, x22, lsl #8",
-        "orr x21, x21, x23, lsl #9",
-        "orr x21, x21, x24, lsl #10",
-        "orr x21, x21, x25, lsl #14",
-        "str w21, [x4, #4]",
-        "ldrb w21, [x28, #1026]",
-        "and w22, w21, #0x1",
-        "mov w23, #0x3",
-        "mrs x24, nzcv",
+        "ldrb w24, [x28, #745]",
+        "ldrb w25, [x28, #746]",
+        "ldrb w30, [x28, #750]",
+        "orr x18, x23, x22, lsl #8",
+        "orr x22, x18, x24, lsl #9",
+        "orr x23, x22, x25, lsl #10",
+        "orr x22, x23, x30, lsl #14",
+        "str w22, [x20, #4]",
+        "ldrb w22, [x28, #1026]",
+        "and w23, w22, #0x1",
+        "mov w24, #0x3",
+        "mrs x25, nzcv",
+        "cmp x23, #0x0 (0)",
+        "csel x30, x24, x21, eq",
+        "orr w23, w21, w30",
+        "lsr w30, w22, #1",
+        "and w18, w30, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x30, x24, x21, eq",
+        "orr w18, w23, w30, lsl #2",
+        "lsr w23, w22, #2",
+        "and w30, w23, #0x1",
+        "cmp x30, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w30, w18, w23, lsl #4",
+        "lsr w23, w22, #3",
+        "and w18, w23, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w18, w30, w23, lsl #6",
+        "lsr w23, w22, #4",
+        "and w30, w23, #0x1",
+        "cmp x30, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w30, w18, w23, lsl #8",
+        "lsr w23, w22, #5",
+        "and w18, w23, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w18, w30, w23, lsl #10",
+        "lsr w23, w22, #6",
+        "and w30, w23, #0x1",
+        "cmp x30, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w30, w18, w23, lsl #12",
+        "lsr w23, w22, #7",
+        "and w22, w23, #0x1",
         "cmp x22, #0x0 (0)",
-        "csel x22, x23, x20, eq",
-        "orr w22, w20, w22",
-        "lsr w25, w21, #1",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #2",
-        "lsr w25, w21, #2",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #4",
-        "lsr w25, w21, #3",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #6",
-        "lsr w25, w21, #4",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #8",
-        "lsr w25, w21, #5",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #10",
-        "lsr w25, w21, #6",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #12",
-        "lsr w21, w21, #7",
-        "and w21, w21, #0x1",
-        "cmp x21, #0x0 (0)",
-        "csel x21, x23, x20, eq",
-        "orr w21, w22, w21, lsl #14",
-        "str w21, [x4, #8]",
-        "str w20, [x4, #12]",
-        "str w20, [x4, #16]",
-        "str w20, [x4, #20]",
-        "str w20, [x4, #24]",
-        "msr nzcv, x24"
+        "csel x23, x24, x21, eq",
+        "orr w22, w30, w23, lsl #14",
+        "str w22, [x20, #8]",
+        "str w21, [x20, #12]",
+        "str w21, [x20, #16]",
+        "str w21, [x20, #20]",
+        "str w21, [x20, #24]",
+        "msr nzcv, x25"
       ]
     },
     "fnstcw [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0xd9 !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrh w20, [x28, #1024]",
-        "strh w20, [x4]"
+        "mov x21, x4",
+        "strh w20, [x21]"
       ]
     },
     "fld st0": {
@@ -4055,15 +4069,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -4079,14 +4093,14 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -4101,15 +4115,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -4124,15 +4138,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -4147,15 +4161,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -4170,15 +4184,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -4193,15 +4207,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -4216,15 +4230,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -4239,14 +4253,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -4258,14 +4272,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -4277,14 +4291,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -4296,14 +4310,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -4315,14 +4329,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -4334,14 +4348,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -4353,14 +4367,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -4372,14 +4386,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -4391,7 +4405,7 @@
       "ExpectedArm64ASM": []
     },
     "fchs": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "0xd9 11b 0xe0 /4"
       ],
@@ -4402,14 +4416,15 @@
         "mov w21, #0x0",
         "mov w22, #0x8000",
         "fmov d3, x21",
-        "mov v3.d[1], x22",
-        "eor v2.16b, v2.16b, v3.16b",
+        "mov v4.16b, v3.16b",
+        "mov v4.d[1], x22",
+        "eor v3.16b, v2.16b, v4.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fabs": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "0xd9 11b 0xe1 /4"
       ],
@@ -4420,10 +4435,11 @@
         "mov x21, #0xffffffffffffffff",
         "mov w22, #0x7fff",
         "fmov d3, x21",
-        "mov v3.d[1], x22",
-        "and v2.16b, v2.16b, v3.16b",
+        "mov v4.16b, v3.16b",
+        "mov v4.d[1], x22",
+        "and v3.16b, v2.16b, v4.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "ftst": {
@@ -4467,13 +4483,13 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
         "strb w20, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]"
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]"
       ]
     },
     "fxam": {
@@ -4486,11 +4502,11 @@
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
         "mov x21, v2.d[1]",
-        "ubfx x21, x21, #15, #1",
-        "strb w21, [x28, #745]",
+        "ubfx x22, x21, #15, #1",
+        "strb w22, [x28, #745]",
         "ldrb w21, [x28, #1026]",
-        "lsr w20, w21, w20",
-        "and w20, w20, #0x1",
+        "lsr w22, w21, w20",
+        "and w20, w22, #0x1",
         "mrs x21, nzcv",
         "cmp x20, #0x1 (1)",
         "cset x22, ne",
@@ -4508,11 +4524,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "ldr q2, [x28, #2304]",
@@ -4528,11 +4544,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "ldr q2, [x28, #2320]",
@@ -4548,11 +4564,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "ldr q2, [x28, #2336]",
@@ -4568,11 +4584,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "ldr q2, [x28, #2352]",
@@ -4588,11 +4604,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "ldr q2, [x28, #2368]",
@@ -4608,11 +4624,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "ldr q2, [x28, #2384]",
@@ -4628,11 +4644,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "movi v2.2d, #0x0",
@@ -4674,11 +4690,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fyl2x": {
@@ -4690,15 +4706,15 @@
         "ldrb w20, [x28, #747]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "strb w21, [x28, #747]",
+        "and w22, w21, #0x7",
+        "strb w22, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -4727,11 +4743,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fptan": {
@@ -4743,12 +4759,12 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "sub w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w22",
-        "orr w21, w23, w21",
+        "and w23, w22, #0x7",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w23",
+        "orr w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "strb w22, [x28, #747]",
+        "strb w23, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
@@ -4776,16 +4792,16 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldr q3, [x28, #2304]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "ldr q2, [x28, #2304]",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
-        "str q3, [x0, #768]"
+        "str q3, [x0, #768]",
+        "add x0, x28, x23, lsl #4",
+        "str q2, [x0, #768]"
       ]
     },
     "fpatan": {
@@ -4797,15 +4813,15 @@
         "ldrb w20, [x28, #747]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "strb w21, [x28, #747]",
+        "and w22, w21, #0x7",
+        "strb w22, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -4834,11 +4850,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fxtract": {
@@ -4850,12 +4866,12 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "sub w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w22",
-        "orr w21, w23, w21",
+        "and w23, w22, #0x7",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w23",
+        "orr w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "strb w22, [x28, #747]",
+        "strb w23, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
@@ -4911,13 +4927,13 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fprem1": {
@@ -4928,10 +4944,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -4960,13 +4976,13 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdecstp": {
@@ -4976,8 +4992,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -4988,8 +5004,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -5001,10 +5017,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -5033,13 +5049,13 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fyl2xp1": {
@@ -5051,15 +5067,15 @@
         "ldrb w20, [x28, #747]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "strb w21, [x28, #747]",
+        "and w22, w21, #0x7",
+        "strb w22, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "ldr q4, [x28, #2304]",
         "mrs x0, nzcv",
@@ -5089,9 +5105,9 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v5.16b, v5.16b, v5.16b",
+        "mov v5.d[0], x0",
+        "mov v5.h[4], w1",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5104,8 +5120,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
+        "mov x1, v5.d[0]",
+        "umov w2, v5.h[4]",
         "mov x3, v3.d[0]",
         "umov w4, v3.h[4]",
         "ldr x5, [x28, #1440]",
@@ -5122,7 +5138,7 @@
         "eor v2.16b, v2.16b, v2.16b",
         "mov v2.d[0], x0",
         "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -5160,11 +5176,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fsincos": {
@@ -5176,12 +5192,12 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "sub w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w22",
-        "orr w21, w23, w21",
+        "and w23, w22, #0x7",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w23",
+        "orr w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "strb w22, [x28, #747]",
+        "strb w23, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
@@ -5237,15 +5253,15 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "frndint": {
@@ -5282,11 +5298,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fscale": {
@@ -5297,10 +5313,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -5329,11 +5345,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsin": {
@@ -5370,13 +5386,13 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fcos": {
@@ -5413,23 +5429,24 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fiadd dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xda !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
+        "mov x21, x4",
+        "ldr w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5441,7 +5458,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "mov w1, w21",
+        "mov w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1320]",
         "blr x2",
@@ -5486,21 +5503,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fimul dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xda !11b /1"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
+        "mov x21, x4",
+        "ldr w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5512,7 +5530,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "mov w1, w21",
+        "mov w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1320]",
         "blr x2",
@@ -5557,21 +5575,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "ficom dword [rax]": {
-      "ExpectedInstructionCount": 69,
+      "ExpectedInstructionCount": 70,
       "Comment": [
         "0xda !11b /2"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
+        "mov x21, x4",
+        "ldr w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5583,7 +5602,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "mov w1, w21",
+        "mov w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1320]",
         "blr x2",
@@ -5631,24 +5650,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "ficomp dword [rax]": {
-      "ExpectedInstructionCount": 77,
+      "ExpectedInstructionCount": 78,
       "Comment": [
         "0xda !11b /3"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
+        "mov x21, x4",
+        "ldr w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5660,7 +5680,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "mov w1, w21",
+        "mov w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1320]",
         "blr x2",
@@ -5708,32 +5728,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fisub dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xda !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
+        "mov x21, x4",
+        "ldr w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5745,7 +5766,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "mov w1, w21",
+        "mov w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1320]",
         "blr x2",
@@ -5790,21 +5811,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fisubr dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xda !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
+        "mov x21, x4",
+        "ldr w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5816,7 +5838,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "mov w1, w21",
+        "mov w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1320]",
         "blr x2",
@@ -5861,21 +5883,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fidiv dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xda !11b /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
+        "mov x21, x4",
+        "ldr w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5887,7 +5910,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "mov w1, w21",
+        "mov w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1320]",
         "blr x2",
@@ -5932,21 +5955,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fidivr dword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xda !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
+        "mov x21, x4",
+        "ldr w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -5958,7 +5982,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "mov w1, w21",
+        "mov w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1320]",
         "blr x2",
@@ -6003,15 +6027,15 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fcmovb st0, st0": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc0 /0"
       ],
@@ -6020,18 +6044,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc1 /0"
       ],
@@ -6040,18 +6065,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st2": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc2 /0"
       ],
@@ -6060,18 +6086,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st3": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc3 /0"
       ],
@@ -6080,18 +6107,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st4": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc4 /0"
       ],
@@ -6100,18 +6128,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st5": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc5 /0"
       ],
@@ -6120,18 +6149,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st6": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc6 /0"
       ],
@@ -6140,18 +6170,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st7": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc7 /0"
       ],
@@ -6160,18 +6191,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st0": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc8 /1"
       ],
@@ -6180,18 +6212,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc9 /1"
       ],
@@ -6200,18 +6233,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st2": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xca /1"
       ],
@@ -6220,18 +6254,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st3": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xcb /1"
       ],
@@ -6240,18 +6275,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st4": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xcc /1"
       ],
@@ -6260,18 +6296,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st5": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xcd /1"
       ],
@@ -6280,18 +6317,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st6": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xce /1"
       ],
@@ -6300,18 +6338,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st7": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xcf /1"
       ],
@@ -6320,398 +6359,423 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st0": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd0 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st1": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd1 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st2": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd2 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st3": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd3 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st4": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd4 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st5": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd5 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st6": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd6 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st7": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd7 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovu st0, st0": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xd8 /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x0 (0)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st1": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xd9 /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x1 (1)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st2": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xda /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x2 (2)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x2 (2)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st3": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xdb /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x3 (3)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x3 (3)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st4": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xdc /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x4 (4)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x4 (4)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st5": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xdd /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x5 (5)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x5 (5)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st6": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xde /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x6 (6)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x6 (6)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st7": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xdf /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x7 (7)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x7 (7)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fucompp": {
@@ -6723,8 +6787,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -6758,73 +6822,78 @@
         "mov x22, x0",
         "ubfx x23, x22, #1, #1",
         "ubfx x24, x22, #0, #1",
-        "ubfx x22, x22, #2, #1",
-        "orr w23, w23, w22",
-        "orr w24, w24, w22",
-        "strb w23, [x28, #744]",
-        "mov w23, #0x0",
-        "strb w23, [x28, #745]",
-        "strb w22, [x28, #746]",
-        "strb w24, [x28, #750]",
+        "ubfx x25, x22, #2, #1",
+        "orr w22, w23, w25",
+        "orr w23, w24, w25",
+        "strb w22, [x28, #744]",
+        "mov w22, #0x0",
+        "strb w22, [x28, #745]",
+        "strb w25, [x28, #746]",
+        "strb w23, [x28, #750]",
         "ldrb w22, [x28, #1026]",
         "lsl w23, w21, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "bic w24, w22, w23",
+        "strb w24, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "lsl w22, w21, w20",
+        "bic w21, w24, w22",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fild dword [rax]": {
-      "ExpectedInstructionCount": 35,
+      "ExpectedInstructionCount": 40,
       "Comment": [
         "0xdf !11b /5"
       ],
       "ExpectedArm64ASM": [
+        "sub sp, sp, #0x20 (32)",
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "mov w22, #0x0",
-        "sxtw x21, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "mov w21, #0x0",
+        "sxtw x23, w22",
         "mrs x22, nzcv",
-        "cmp x21, #0x0 (0)",
-        "mov w23, #0x8000",
-        "csel x23, x23, xzr, lt",
-        "cneg x21, x21, mi",
-        "mov w24, #0x3f",
+        "cmp x23, #0x0 (0)",
+        "mov w24, #0x8000",
+        "csel x25, x24, xzr, lt",
+        "cneg x24, x23, mi",
+        "mov w23, #0x3f",
         "mov x0, #0x3f",
-        "clz x25, x21",
-        "sub x25, x0, x25",
-        "sub x24, x24, x25",
-        "lsl x25, x21, x24",
+        "clz x30, x24",
+        "sub x30, x0, x30",
+        "sub x18, x23, x30",
+        "lsl x23, x24, x18",
         "mov w30, #0x403e",
-        "sub x24, x30, x24",
-        "mov w30, #0x0",
-        "cmp x21, #0x0 (0)",
-        "csel x21, x30, x24, eq",
-        "orr x21, x23, x21",
-        "fmov d2, x25",
+        "str w22, [sp]",
+        "sub x22, x30, x18",
+        "cmp x24, #0x0 (0)",
+        "csel x30, x21, x22, eq",
+        "orr x21, x25, x30",
+        "fmov d2, x23",
         "fmov d3, x21",
-        "mov v2.d[1], v3.d[0]",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[1], v3.d[0]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x22"
+        "str q4, [x0, #768]",
+        "ldr w20, [sp]",
+        "msr nzcv, x20",
+        "add sp, sp, #0x20 (32)"
       ]
     },
     "fisttp dword [rax]": {
-      "ExpectedInstructionCount": 38,
+      "ExpectedInstructionCount": 39,
       "Comment": [
         "0xdb !11b /1"
       ],
@@ -6858,19 +6927,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "mov w21, w0",
-        "str w21, [x4]",
+        "mov x22, x4",
+        "str w21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fist dword [rax]": {
-      "ExpectedInstructionCount": 30,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "0xdb !11b /2"
       ],
@@ -6904,11 +6974,12 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "mov w20, w0",
-        "str w20, [x4]"
+        "mov x21, x4",
+        "str w20, [x21]"
       ]
     },
     "fistp dword [rax]": {
-      "ExpectedInstructionCount": 38,
+      "ExpectedInstructionCount": 39,
       "Comment": [
         "0xdf !11b /7"
       ],
@@ -6942,31 +7013,33 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "mov w21, w0",
-        "str w21, [x4]",
+        "mov x22, x4",
+        "str w21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fld tword [rax]": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr q2, [x4]",
+        "mov x21, x4",
+        "ldr q2, [x21]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -6974,7 +7047,7 @@
       ]
     },
     "fstp tword [rax]": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xdb !11b /7"
       ],
@@ -6982,21 +7055,22 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "str d2, [x4]",
-        "mov x21, v2.d[1]",
-        "strh w21, [x4, #8]",
+        "mov x21, x4",
+        "str d2, [x21]",
+        "mov x22, v2.d[1]",
+        "strh w22, [x21, #8]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcmovnb st0, st0": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc0 /0"
       ],
@@ -7005,18 +7079,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc1 /0"
       ],
@@ -7025,18 +7100,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st2": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc2 /0"
       ],
@@ -7045,18 +7121,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st3": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc3 /0"
       ],
@@ -7065,18 +7142,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st4": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc4 /0"
       ],
@@ -7085,18 +7163,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st5": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc5 /0"
       ],
@@ -7105,18 +7184,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st6": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc6 /0"
       ],
@@ -7125,18 +7205,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st7": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc7 /0"
       ],
@@ -7145,18 +7226,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st0": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc8 /1"
       ],
@@ -7165,18 +7247,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc9 /1"
       ],
@@ -7185,18 +7268,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st2": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xca /1"
       ],
@@ -7205,18 +7289,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st3": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xcb /1"
       ],
@@ -7225,18 +7310,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st4": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xcc /1"
       ],
@@ -7245,18 +7331,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st5": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xcd /1"
       ],
@@ -7265,18 +7352,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st6": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xce /1"
       ],
@@ -7285,18 +7373,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st7": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xcf /1"
       ],
@@ -7305,390 +7394,415 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st0": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd0 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st1": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd1 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st2": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd2 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st3": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd3 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st4": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd4 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st5": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd5 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st6": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd6 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st7": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd7 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnu st0, st0": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xd8 /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x0 (0)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st1": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xd9 /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x1 (1)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st2": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xda /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x2 (2)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x2 (2)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st3": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xdb /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x3 (3)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x3 (3)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st4": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xdc /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x4 (4)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x4 (4)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st5": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xdd /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x5 (5)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x5 (5)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st6": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xde /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x6 (6)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x6 (6)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st7": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xdf /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x7 (7)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x7 (7)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fnclex": {
@@ -7716,15 +7830,15 @@
       ]
     },
     "fucomi st0, st0": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 45,
       "Comment": [
         "0xdb 11b 0xe8 /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -7758,25 +7872,26 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "lsl x21, x21, #29",
-        "orr w21, w21, w22, lsl #30",
-        "eor w26, w20, #0x1",
-        "msr nzcv, x21"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "lsl x22, x20, #29",
+        "orr w20, w22, w21, lsl #30",
+        "eor w21, w23, #0x1",
+        "mov x26, x21",
+        "msr nzcv, x20"
       ]
     },
     "fucomi st0, st1": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 45,
       "Comment": [
         "0xdb 11b 0xe9 /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -7810,25 +7925,26 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "lsl x21, x21, #29",
-        "orr w21, w21, w22, lsl #30",
-        "eor w26, w20, #0x1",
-        "msr nzcv, x21"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "lsl x22, x20, #29",
+        "orr w20, w22, w21, lsl #30",
+        "eor w21, w23, #0x1",
+        "mov x26, x21",
+        "msr nzcv, x20"
       ]
     },
     "fucomi st0, st2": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 45,
       "Comment": [
         "0xdb 11b 0xea /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -7862,25 +7978,26 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "lsl x21, x21, #29",
-        "orr w21, w21, w22, lsl #30",
-        "eor w26, w20, #0x1",
-        "msr nzcv, x21"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "lsl x22, x20, #29",
+        "orr w20, w22, w21, lsl #30",
+        "eor w21, w23, #0x1",
+        "mov x26, x21",
+        "msr nzcv, x20"
       ]
     },
     "fucomi st0, st3": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 45,
       "Comment": [
         "0xdb 11b 0xeb /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -7914,25 +8031,26 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "lsl x21, x21, #29",
-        "orr w21, w21, w22, lsl #30",
-        "eor w26, w20, #0x1",
-        "msr nzcv, x21"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "lsl x22, x20, #29",
+        "orr w20, w22, w21, lsl #30",
+        "eor w21, w23, #0x1",
+        "mov x26, x21",
+        "msr nzcv, x20"
       ]
     },
     "fucomi st0, st4": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 45,
       "Comment": [
         "0xdb 11b 0xec /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -7966,25 +8084,26 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "lsl x21, x21, #29",
-        "orr w21, w21, w22, lsl #30",
-        "eor w26, w20, #0x1",
-        "msr nzcv, x21"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "lsl x22, x20, #29",
+        "orr w20, w22, w21, lsl #30",
+        "eor w21, w23, #0x1",
+        "mov x26, x21",
+        "msr nzcv, x20"
       ]
     },
     "fucomi st0, st5": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 45,
       "Comment": [
         "0xdb 11b 0xed /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8018,25 +8137,26 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "lsl x21, x21, #29",
-        "orr w21, w21, w22, lsl #30",
-        "eor w26, w20, #0x1",
-        "msr nzcv, x21"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "lsl x22, x20, #29",
+        "orr w20, w22, w21, lsl #30",
+        "eor w21, w23, #0x1",
+        "mov x26, x21",
+        "msr nzcv, x20"
       ]
     },
     "fucomi st0, st6": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 45,
       "Comment": [
         "0xdb 11b 0xee /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8070,25 +8190,26 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "lsl x21, x21, #29",
-        "orr w21, w21, w22, lsl #30",
-        "eor w26, w20, #0x1",
-        "msr nzcv, x21"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "lsl x22, x20, #29",
+        "orr w20, w22, w21, lsl #30",
+        "eor w21, w23, #0x1",
+        "mov x26, x21",
+        "msr nzcv, x20"
       ]
     },
     "fucomi st0, st7": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 45,
       "Comment": [
         "0xdb 11b 0xef /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8122,25 +8243,26 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "lsl x21, x21, #29",
-        "orr w21, w21, w22, lsl #30",
-        "eor w26, w20, #0x1",
-        "msr nzcv, x21"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "lsl x22, x20, #29",
+        "orr w20, w22, w21, lsl #30",
+        "eor w21, w23, #0x1",
+        "mov x26, x21",
+        "msr nzcv, x20"
       ]
     },
     "fcomi st0, st0": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 45,
       "Comment": [
         "0xdb 11b 0xf0 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8174,25 +8296,26 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "lsl x21, x21, #29",
-        "orr w21, w21, w22, lsl #30",
-        "eor w26, w20, #0x1",
-        "msr nzcv, x21"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "lsl x22, x20, #29",
+        "orr w20, w22, w21, lsl #30",
+        "eor w21, w23, #0x1",
+        "mov x26, x21",
+        "msr nzcv, x20"
       ]
     },
     "fcomi st0, st1": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 45,
       "Comment": [
         "0xdb 11b 0xf1 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8226,25 +8349,26 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "lsl x21, x21, #29",
-        "orr w21, w21, w22, lsl #30",
-        "eor w26, w20, #0x1",
-        "msr nzcv, x21"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "lsl x22, x20, #29",
+        "orr w20, w22, w21, lsl #30",
+        "eor w21, w23, #0x1",
+        "mov x26, x21",
+        "msr nzcv, x20"
       ]
     },
     "fcomi st0, st2": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 45,
       "Comment": [
         "0xdb 11b 0xf2 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8278,25 +8402,26 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "lsl x21, x21, #29",
-        "orr w21, w21, w22, lsl #30",
-        "eor w26, w20, #0x1",
-        "msr nzcv, x21"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "lsl x22, x20, #29",
+        "orr w20, w22, w21, lsl #30",
+        "eor w21, w23, #0x1",
+        "mov x26, x21",
+        "msr nzcv, x20"
       ]
     },
     "fcomi st0, st3": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 45,
       "Comment": [
         "0xdb 11b 0xf3 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8330,25 +8455,26 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "lsl x21, x21, #29",
-        "orr w21, w21, w22, lsl #30",
-        "eor w26, w20, #0x1",
-        "msr nzcv, x21"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "lsl x22, x20, #29",
+        "orr w20, w22, w21, lsl #30",
+        "eor w21, w23, #0x1",
+        "mov x26, x21",
+        "msr nzcv, x20"
       ]
     },
     "fcomi st0, st4": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 45,
       "Comment": [
         "0xdb 11b 0xf4 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8382,25 +8508,26 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "lsl x21, x21, #29",
-        "orr w21, w21, w22, lsl #30",
-        "eor w26, w20, #0x1",
-        "msr nzcv, x21"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "lsl x22, x20, #29",
+        "orr w20, w22, w21, lsl #30",
+        "eor w21, w23, #0x1",
+        "mov x26, x21",
+        "msr nzcv, x20"
       ]
     },
     "fcomi st0, st5": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 45,
       "Comment": [
         "0xdb 11b 0xf5 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8434,25 +8561,26 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "lsl x21, x21, #29",
-        "orr w21, w21, w22, lsl #30",
-        "eor w26, w20, #0x1",
-        "msr nzcv, x21"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "lsl x22, x20, #29",
+        "orr w20, w22, w21, lsl #30",
+        "eor w21, w23, #0x1",
+        "mov x26, x21",
+        "msr nzcv, x20"
       ]
     },
     "fcomi st0, st6": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 45,
       "Comment": [
         "0xdb 11b 0xf6 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8486,25 +8614,26 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "lsl x21, x21, #29",
-        "orr w21, w21, w22, lsl #30",
-        "eor w26, w20, #0x1",
-        "msr nzcv, x21"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "lsl x22, x20, #29",
+        "orr w20, w22, w21, lsl #30",
+        "eor w21, w23, #0x1",
+        "mov x26, x21",
+        "msr nzcv, x20"
       ]
     },
     "fcomi st0, st7": {
-      "ExpectedInstructionCount": 44,
+      "ExpectedInstructionCount": 45,
       "Comment": [
         "0xdb 11b 0xf7 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -8538,23 +8667,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "lsl x21, x21, #29",
-        "orr w21, w21, w22, lsl #30",
-        "eor w26, w20, #0x1",
-        "msr nzcv, x21"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "lsl x22, x20, #29",
+        "orr w20, w22, w21, lsl #30",
+        "eor w21, w23, #0x1",
+        "mov x26, x21",
+        "msr nzcv, x20"
       ]
     },
     "fadd qword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xdc !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8579,11 +8710,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8596,10 +8727,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1408]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -8611,21 +8742,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fmul qword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xdc !11b /1"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8650,11 +8782,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8667,10 +8799,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1424]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -8682,21 +8814,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fcom qword [rax]": {
-      "ExpectedInstructionCount": 69,
+      "ExpectedInstructionCount": 70,
       "Comment": [
         "0xdc !11b /2"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8721,11 +8854,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8738,10 +8871,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1304]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -8756,24 +8889,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fcomp qword [rax]": {
-      "ExpectedInstructionCount": 77,
+      "ExpectedInstructionCount": 78,
       "Comment": [
         "0xdc !11b /3"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8798,11 +8932,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8815,10 +8949,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1304]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -8833,32 +8967,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fsub qword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xdc !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8883,11 +9018,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8900,10 +9035,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1416]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -8915,21 +9050,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr qword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xdc !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8954,11 +9090,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -8971,10 +9107,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
-        "mov x3, v3.d[0]",
-        "umov w4, v3.h[4]",
+        "mov x1, v3.d[0]",
+        "umov w2, v3.h[4]",
+        "mov x3, v2.d[0]",
+        "umov w4, v2.h[4]",
         "ldr x5, [x28, #1416]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -8986,21 +9122,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv qword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xdc !11b /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -9025,11 +9162,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -9042,10 +9179,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
-        "mov x3, v2.d[0]",
-        "umov w4, v2.h[4]",
+        "mov x1, v2.d[0]",
+        "umov w2, v2.h[4]",
+        "mov x3, v3.d[0]",
+        "umov w4, v3.h[4]",
         "ldr x5, [x28, #1432]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -9057,21 +9194,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr qword [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xdc !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -9096,11 +9234,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "ldr q3, [x0, #768]",
+        "ldr q2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -9113,10 +9251,10 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
-        "mov x3, v3.d[0]",
-        "umov w4, v3.h[4]",
+        "mov x1, v3.d[0]",
+        "umov w2, v3.h[4]",
+        "mov x3, v2.d[0]",
+        "umov w4, v2.h[4]",
         "ldr x5, [x28, #1432]",
         "blr x5",
         "ldr w4, [x28, #728]",
@@ -9128,11 +9266,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xc0": {
@@ -9145,8 +9283,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9177,11 +9315,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st1, st0": {
@@ -9192,8 +9330,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9224,11 +9362,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st2, st0": {
@@ -9239,8 +9377,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9271,11 +9409,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st3, st0": {
@@ -9286,8 +9424,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9318,11 +9456,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st4, st0": {
@@ -9333,8 +9471,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9365,11 +9503,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st5, st0": {
@@ -9380,8 +9518,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9412,11 +9550,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st6, st0": {
@@ -9427,8 +9565,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9459,11 +9597,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fadd st7, st0": {
@@ -9474,8 +9612,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9506,11 +9644,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xc8": {
@@ -9523,8 +9661,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9555,11 +9693,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st1, st0": {
@@ -9570,8 +9708,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9602,11 +9740,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st2, st0": {
@@ -9617,8 +9755,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9649,11 +9787,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st3, st0": {
@@ -9664,8 +9802,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9696,11 +9834,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st4, st0": {
@@ -9711,8 +9849,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9743,11 +9881,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st5, st0": {
@@ -9758,8 +9896,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9790,11 +9928,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st6, st0": {
@@ -9805,8 +9943,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9837,11 +9975,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmul st7, st0": {
@@ -9852,8 +9990,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9884,11 +10022,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xe0": {
@@ -9901,8 +10039,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9933,11 +10071,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st1, st0": {
@@ -9948,8 +10086,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -9980,11 +10118,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st2, st0": {
@@ -9995,8 +10133,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10027,11 +10165,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st3, st0": {
@@ -10042,8 +10180,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10074,11 +10212,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st4, st0": {
@@ -10089,8 +10227,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10121,11 +10259,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st5, st0": {
@@ -10136,8 +10274,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10168,11 +10306,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st6, st0": {
@@ -10183,8 +10321,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10215,11 +10353,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubr st7, st0": {
@@ -10230,8 +10368,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10262,11 +10400,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xe8": {
@@ -10279,8 +10417,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10311,11 +10449,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st1, st0": {
@@ -10326,8 +10464,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10358,11 +10496,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st2, st0": {
@@ -10373,8 +10511,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10405,11 +10543,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st3, st0": {
@@ -10420,8 +10558,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10452,11 +10590,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st4, st0": {
@@ -10467,8 +10605,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10499,11 +10637,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st5, st0": {
@@ -10514,8 +10652,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10546,11 +10684,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st6, st0": {
@@ -10561,8 +10699,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10593,11 +10731,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsub st7, st0": {
@@ -10608,8 +10746,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10640,11 +10778,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xf0": {
@@ -10657,8 +10795,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10689,11 +10827,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st1, st0": {
@@ -10704,8 +10842,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10736,11 +10874,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st2, st0": {
@@ -10751,8 +10889,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10783,11 +10921,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st3, st0": {
@@ -10798,8 +10936,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10830,11 +10968,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st4, st0": {
@@ -10845,8 +10983,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10877,11 +11015,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st5, st0": {
@@ -10892,8 +11030,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10924,11 +11062,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st6, st0": {
@@ -10939,8 +11077,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -10971,11 +11109,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivr st7, st0": {
@@ -10986,8 +11124,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11018,11 +11156,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xf8": {
@@ -11035,8 +11173,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11067,11 +11205,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st1, st0": {
@@ -11082,8 +11220,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11114,11 +11252,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st2, st0": {
@@ -11129,8 +11267,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11159,13 +11297,13 @@
         "ldr x8, [x28, #40]",
         "ldp x16, x17, [x28, #104]",
         "ld1 {v2.2d, v3.2d}, [sp], #32",
-        "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
-        "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
+        "ldr x30, [sp], #16",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st3, st0": {
@@ -11176,8 +11314,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11208,11 +11346,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st4, st0": {
@@ -11223,8 +11361,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11255,11 +11393,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st5, st0": {
@@ -11270,8 +11408,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11302,11 +11440,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st6, st0": {
@@ -11317,8 +11455,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11349,11 +11487,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdiv st7, st0": {
@@ -11364,8 +11502,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -11396,21 +11534,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fld qword [rax]": {
-      "ExpectedInstructionCount": 39,
+      "ExpectedInstructionCount": 40,
       "Comment": [
         "0xdd !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -11435,23 +11574,23 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fisttp qword [rax]": {
-      "ExpectedInstructionCount": 38,
+      "ExpectedInstructionCount": 39,
       "Comment": [
         "0xdd !11b /1"
       ],
@@ -11485,19 +11624,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "mov x21, x0",
-        "str x21, [x4]",
+        "mov x22, x4",
+        "str x21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fst qword [rax]": {
-      "ExpectedInstructionCount": 30,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "0xdd !11b /2"
       ],
@@ -11530,12 +11670,13 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v2.8b, v0.8b",
-        "str d2, [x4]"
+        "mov v3.8b, v0.8b",
+        "mov x20, x4",
+        "str d3, [x20]"
       ]
     },
     "fstp qword [rax]": {
-      "ExpectedInstructionCount": 38,
+      "ExpectedInstructionCount": 39,
       "Comment": [
         "0xdd !11b /3"
       ],
@@ -11568,278 +11709,289 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v2.8b, v0.8b",
-        "str d2, [x4]",
+        "mov v3.8b, v0.8b",
+        "mov x21, x4",
+        "str d3, [x21]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "frstor [rax]": {
-      "ExpectedInstructionCount": 107,
+      "ExpectedInstructionCount": 110,
       "Comment": [
         "0xdd !11b /4"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x4]",
-        "strh w20, [x28, #1024]",
-        "ldr w20, [x4, #4]",
-        "ubfx w21, w20, #11, #3",
-        "strb w21, [x28, #747]",
-        "ubfx w22, w20, #8, #1",
-        "ubfx w23, w20, #9, #1",
-        "ubfx w24, w20, #10, #1",
-        "ubfx w20, w20, #14, #1",
-        "strb w22, [x28, #744]",
-        "strb w23, [x28, #745]",
-        "strb w24, [x28, #746]",
-        "strb w20, [x28, #750]",
-        "ldr w20, [x4, #8]",
-        "ubfx w22, w20, #0, #2",
-        "mrs x23, nzcv",
-        "cmp x22, #0x3 (3)",
-        "cset x22, ne",
-        "ubfx w24, w20, #2, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #1",
-        "ubfx w24, w20, #4, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #2",
-        "ubfx w24, w20, #6, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #3",
-        "ubfx w24, w20, #8, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #4",
-        "ubfx w24, w20, #10, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #5",
-        "ubfx w24, w20, #12, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #6",
-        "ubfx w20, w20, #14, #2",
-        "cmp x20, #0x3 (3)",
-        "cset x20, ne",
-        "orr w20, w22, w20, lsl #7",
-        "strb w20, [x28, #1026]",
-        "add x20, x4, #0x1c (28)",
-        "mov x22, #0xffffffffffffffff",
-        "mov w24, #0xffff",
-        "fmov d2, x22",
-        "mov v2.d[1], x24",
-        "ldur q3, [x4, #28]",
-        "and v3.16b, v3.16b, v2.16b",
-        "add x0, x28, x21, lsl #4",
-        "str q3, [x0, #768]",
-        "add x22, x20, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x20, #10]",
-        "and v3.16b, v3.16b, v2.16b",
-        "add x0, x28, x21, lsl #4",
-        "str q3, [x0, #768]",
-        "add x20, x22, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x22, #10]",
-        "and v3.16b, v3.16b, v2.16b",
-        "add x0, x28, x21, lsl #4",
-        "str q3, [x0, #768]",
-        "add x22, x20, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x20, #10]",
-        "and v3.16b, v3.16b, v2.16b",
-        "add x0, x28, x21, lsl #4",
-        "str q3, [x0, #768]",
-        "add x20, x22, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x22, #10]",
-        "and v3.16b, v3.16b, v2.16b",
-        "add x0, x28, x21, lsl #4",
-        "str q3, [x0, #768]",
-        "add x22, x20, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x20, #10]",
-        "and v3.16b, v3.16b, v2.16b",
-        "add x0, x28, x21, lsl #4",
-        "str q3, [x0, #768]",
-        "add x20, x22, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x22, #10]",
-        "and v2.16b, v3.16b, v2.16b",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]",
-        "add x22, x20, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur d2, [x20, #10]",
-        "ldr h3, [x22, #8]",
-        "mov v2.h[4], v3.h[0]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x23"
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "strh w21, [x28, #1024]",
+        "ldr w21, [x20, #4]",
+        "ubfx w22, w21, #11, #3",
+        "strb w22, [x28, #747]",
+        "ubfx w23, w21, #8, #1",
+        "ubfx w24, w21, #9, #1",
+        "ubfx w25, w21, #10, #1",
+        "ubfx w30, w21, #14, #1",
+        "strb w23, [x28, #744]",
+        "strb w24, [x28, #745]",
+        "strb w25, [x28, #746]",
+        "strb w30, [x28, #750]",
+        "ldr w21, [x20, #8]",
+        "ubfx w23, w21, #0, #2",
+        "mrs x24, nzcv",
+        "cmp x23, #0x3 (3)",
+        "cset x25, ne",
+        "ubfx w23, w21, #2, #2",
+        "cmp x23, #0x3 (3)",
+        "cset x30, ne",
+        "orr w23, w25, w30, lsl #1",
+        "ubfx w25, w21, #4, #2",
+        "cmp x25, #0x3 (3)",
+        "cset x30, ne",
+        "orr w25, w23, w30, lsl #2",
+        "ubfx w23, w21, #6, #2",
+        "cmp x23, #0x3 (3)",
+        "cset x30, ne",
+        "orr w23, w25, w30, lsl #3",
+        "ubfx w25, w21, #8, #2",
+        "cmp x25, #0x3 (3)",
+        "cset x30, ne",
+        "orr w25, w23, w30, lsl #4",
+        "ubfx w23, w21, #10, #2",
+        "cmp x23, #0x3 (3)",
+        "cset x30, ne",
+        "orr w23, w25, w30, lsl #5",
+        "ubfx w25, w21, #12, #2",
+        "cmp x25, #0x3 (3)",
+        "cset x30, ne",
+        "orr w25, w23, w30, lsl #6",
+        "ubfx w23, w21, #14, #2",
+        "cmp x23, #0x3 (3)",
+        "cset x21, ne",
+        "orr w23, w25, w21, lsl #7",
+        "strb w23, [x28, #1026]",
+        "add x21, x20, #0x1c (28)",
+        "mov x23, #0xffffffffffffffff",
+        "mov w25, #0xffff",
+        "fmov d2, x23",
+        "mov v3.16b, v2.16b",
+        "mov v3.d[1], x25",
+        "ldur q2, [x20, #28]",
+        "and v4.16b, v2.16b, v3.16b",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]",
+        "add x20, x21, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x21, #10]",
+        "and v4.16b, v2.16b, v3.16b",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]",
+        "add x21, x20, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x20, #10]",
+        "and v4.16b, v2.16b, v3.16b",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]",
+        "add x20, x21, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x21, #10]",
+        "and v4.16b, v2.16b, v3.16b",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]",
+        "add x21, x20, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x20, #10]",
+        "and v4.16b, v2.16b, v3.16b",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]",
+        "add x20, x21, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x21, #10]",
+        "and v4.16b, v2.16b, v3.16b",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]",
+        "add x21, x20, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x20, #10]",
+        "and v4.16b, v2.16b, v3.16b",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]",
+        "add x20, x21, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur d2, [x21, #10]",
+        "ldr h3, [x20, #8]",
+        "mov v4.16b, v2.16b",
+        "mov v4.h[4], v3.h[0]",
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]",
+        "msr nzcv, x24"
       ]
     },
     "fnsave [rax]": {
-      "ExpectedInstructionCount": 119,
+      "ExpectedInstructionCount": 124,
       "Comment": [
         "0xdd !11b /6"
       ],
       "ExpectedArm64ASM": [
-        "ldrb w20, [x28, #747]",
-        "ldrh w21, [x28, #1024]",
-        "str w21, [x4]",
-        "mov w21, #0x0",
-        "mov x22, x21",
-        "bfi x22, x20, #11, #3",
-        "ldrb w23, [x28, #744]",
-        "ldrb w24, [x28, #745]",
-        "ldrb w25, [x28, #746]",
-        "ldrb w30, [x28, #750]",
-        "orr x22, x22, x23, lsl #8",
-        "orr x22, x22, x24, lsl #9",
-        "orr x22, x22, x25, lsl #10",
-        "orr x22, x22, x30, lsl #14",
-        "str w22, [x4, #4]",
-        "ldrb w22, [x28, #1026]",
-        "and w23, w22, #0x1",
+        "sub sp, sp, #0x20 (32)",
+        "mov x20, x4",
+        "ldrb w21, [x28, #747]",
+        "ldrh w22, [x28, #1024]",
+        "str w22, [x20]",
+        "mov w22, #0x0",
+        "mov x23, x22",
+        "bfi x23, x21, #11, #3",
+        "ldrb w24, [x28, #744]",
+        "ldrb w25, [x28, #745]",
+        "ldrb w30, [x28, #746]",
+        "ldrb w18, [x28, #750]",
+        "strb w21, [sp]",
+        "orr x21, x23, x24, lsl #8",
+        "orr x23, x21, x25, lsl #9",
+        "orr x21, x23, x30, lsl #10",
+        "orr x23, x21, x18, lsl #14",
+        "str w23, [x20, #4]",
+        "ldrb w21, [x28, #1026]",
+        "and w23, w21, #0x1",
         "mov w24, #0x3",
         "mrs x25, nzcv",
         "cmp x23, #0x0 (0)",
-        "csel x23, x24, x21, eq",
-        "orr w23, w21, w23",
-        "lsr w30, w22, #1",
-        "and w30, w30, #0x1",
-        "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #2",
-        "lsr w30, w22, #2",
-        "and w30, w30, #0x1",
+        "csel x30, x24, x22, eq",
+        "orr w23, w22, w30",
+        "lsr w30, w21, #1",
+        "and w18, w30, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x30, x24, x22, eq",
+        "orr w18, w23, w30, lsl #2",
+        "lsr w23, w21, #2",
+        "and w30, w23, #0x1",
         "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #4",
-        "lsr w30, w22, #3",
-        "and w30, w30, #0x1",
-        "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #6",
-        "lsr w30, w22, #4",
-        "and w30, w30, #0x1",
-        "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #8",
-        "lsr w30, w22, #5",
-        "and w30, w30, #0x1",
+        "csel x23, x24, x22, eq",
+        "orr w30, w18, w23, lsl #4",
+        "lsr w23, w21, #3",
+        "and w18, w23, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x23, x24, x22, eq",
+        "orr w18, w30, w23, lsl #6",
+        "lsr w23, w21, #4",
+        "and w30, w23, #0x1",
         "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #10",
-        "lsr w30, w22, #6",
-        "and w30, w30, #0x1",
+        "csel x23, x24, x22, eq",
+        "orr w30, w18, w23, lsl #8",
+        "lsr w23, w21, #5",
+        "and w18, w23, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x23, x24, x22, eq",
+        "orr w18, w30, w23, lsl #10",
+        "lsr w23, w21, #6",
+        "and w30, w23, #0x1",
         "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #12",
-        "lsr w22, w22, #7",
-        "and w22, w22, #0x1",
-        "cmp x22, #0x0 (0)",
-        "csel x22, x24, x21, eq",
-        "orr w22, w23, w22, lsl #14",
-        "str w22, [x4, #8]",
-        "str w21, [x4, #12]",
-        "str w21, [x4, #16]",
-        "str w21, [x4, #20]",
-        "str w21, [x4, #24]",
-        "add x22, x4, #0x1c (28)",
-        "add x0, x28, x20, lsl #4",
-        "ldr q2, [x0, #768]",
-        "stur q2, [x4, #28]",
-        "add x23, x22, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
-        "ldr q2, [x0, #768]",
-        "stur q2, [x22, #10]",
-        "add x22, x23, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
-        "ldr q2, [x0, #768]",
-        "stur q2, [x23, #10]",
-        "add x23, x22, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
-        "ldr q2, [x0, #768]",
-        "stur q2, [x22, #10]",
-        "add x22, x23, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
-        "ldr q2, [x0, #768]",
-        "stur q2, [x23, #10]",
-        "add x23, x22, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
-        "ldr q2, [x0, #768]",
-        "stur q2, [x22, #10]",
-        "add x22, x23, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
-        "ldr q2, [x0, #768]",
-        "stur q2, [x23, #10]",
-        "add x23, x22, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
-        "ldr q2, [x0, #768]",
-        "stur d2, [x22, #10]",
-        "dup v2.8h, v2.h[4]",
-        "str h2, [x23, #8]",
+        "csel x23, x24, x22, eq",
+        "orr w30, w18, w23, lsl #12",
+        "lsr w23, w21, #7",
+        "and w21, w23, #0x1",
+        "cmp x21, #0x0 (0)",
+        "csel x23, x24, x22, eq",
+        "orr w21, w30, w23, lsl #14",
+        "str w21, [x20, #8]",
+        "str w22, [x20, #12]",
+        "str w22, [x20, #16]",
+        "str w22, [x20, #20]",
+        "str w22, [x20, #24]",
+        "add x21, x20, #0x1c (28)",
+        "ldrb w23, [sp]",
+        "add x0, x28, x23, lsl #4",
+        "ldr q2, [x0, #768]",
+        "stur q2, [x20, #28]",
+        "add x20, x21, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
+        "ldr q2, [x0, #768]",
+        "stur q2, [x21, #10]",
+        "add x21, x20, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
+        "ldr q2, [x0, #768]",
+        "stur q2, [x20, #10]",
+        "add x20, x21, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
+        "ldr q2, [x0, #768]",
+        "stur q2, [x21, #10]",
+        "add x21, x20, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
+        "ldr q2, [x0, #768]",
+        "stur q2, [x20, #10]",
+        "add x20, x21, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
+        "ldr q2, [x0, #768]",
+        "stur q2, [x21, #10]",
+        "add x21, x20, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
+        "ldr q2, [x0, #768]",
+        "stur q2, [x20, #10]",
+        "add x20, x21, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
+        "ldr q2, [x0, #768]",
+        "stur d2, [x21, #10]",
+        "dup v3.8h, v2.h[4]",
+        "str h3, [x20, #8]",
         "mov w20, #0x37f",
         "strh w20, [x28, #1024]",
-        "strb w21, [x28, #747]",
-        "strb w21, [x28, #744]",
-        "strb w21, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w21, [x28, #750]",
-        "strb w21, [x28, #1026]",
-        "msr nzcv, x25"
+        "strb w22, [x28, #747]",
+        "strb w22, [x28, #744]",
+        "strb w22, [x28, #745]",
+        "strb w22, [x28, #746]",
+        "strb w22, [x28, #750]",
+        "strb w22, [x28, #1026]",
+        "msr nzcv, x25",
+        "add sp, sp, #0x20 (32)"
       ]
     },
     "fnstsw [rax]": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdd !11b /7"
       ],
       "ExpectedArm64ASM": [
         "mov w20, #0x0",
         "ldrb w21, [x28, #747]",
-        "bfi x20, x21, #11, #3",
-        "ldrb w21, [x28, #744]",
-        "ldrb w22, [x28, #745]",
+        "mov x22, x20",
+        "bfi x22, x21, #11, #3",
+        "ldrb w20, [x28, #744]",
+        "ldrb w21, [x28, #745]",
         "ldrb w23, [x28, #746]",
         "ldrb w24, [x28, #750]",
-        "orr x20, x20, x21, lsl #8",
-        "orr x20, x20, x22, lsl #9",
-        "orr x20, x20, x23, lsl #10",
-        "orr x20, x20, x24, lsl #14",
-        "strh w20, [x4]"
+        "orr x25, x22, x20, lsl #8",
+        "orr x20, x25, x21, lsl #9",
+        "orr x21, x20, x23, lsl #10",
+        "orr x20, x21, x24, lsl #14",
+        "mov x21, x4",
+        "strh w20, [x21]"
       ]
     },
     "ffree st0": {
@@ -11849,12 +12001,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x0 (0)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x0 (0)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -11866,11 +12018,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w20, w21, w20",
-        "bic w20, w22, w20",
+        "lsl w23, w21, w20",
+        "bic w20, w22, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -11881,12 +12033,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x2 (2)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x2 (2)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -11897,12 +12049,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x3 (3)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x3 (3)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -11913,12 +12065,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x4 (4)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x4 (4)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -11929,12 +12081,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x5 (5)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x5 (5)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -11945,12 +12097,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x6 (6)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x6 (6)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -11961,12 +12113,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x7 (7)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x7 (7)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -11978,10 +12130,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -11993,10 +12145,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -12008,10 +12160,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -12023,10 +12175,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -12038,10 +12190,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -12053,10 +12205,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -12068,10 +12220,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -12083,10 +12235,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -12098,18 +12250,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12122,17 +12274,17 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
+        "and w23, w22, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12144,18 +12296,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12167,18 +12319,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12190,18 +12342,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12213,18 +12365,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12236,18 +12388,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12259,18 +12411,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12283,8 +12435,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x0",
         "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12318,13 +12470,13 @@
         "mov x20, x0",
         "ubfx x22, x20, #1, #1",
         "ubfx x23, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w22, w22, w20",
-        "orr w23, w23, w20",
-        "strb w22, [x28, #744]",
+        "ubfx x24, x20, #2, #1",
+        "orr w20, w22, w24",
+        "orr w22, w23, w24",
+        "strb w20, [x28, #744]",
         "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w23, [x28, #750]"
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]"
       ]
     },
     "fucom st1": {
@@ -12335,8 +12487,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12370,14 +12522,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fucom st2": {
@@ -12388,8 +12540,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12420,17 +12572,17 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov x20, x0",
-        "ubfx x21, x20, #1, #1",
-        "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "mov x20, x0",
+        "ubfx x21, x20, #1, #1",
+        "ubfx x22, x20, #0, #1",
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fucom st3": {
@@ -12441,8 +12593,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12476,14 +12628,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fucom st4": {
@@ -12494,8 +12646,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12529,14 +12681,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fucom st5": {
@@ -12547,8 +12699,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12582,14 +12734,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fucom st6": {
@@ -12600,8 +12752,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12635,14 +12787,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fucom st7": {
@@ -12653,8 +12805,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12688,14 +12840,14 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "fucomp st0": {
@@ -12707,8 +12859,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x0",
         "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12742,20 +12894,20 @@
         "mov x22, x0",
         "ubfx x23, x22, #1, #1",
         "ubfx x24, x22, #0, #1",
-        "ubfx x22, x22, #2, #1",
-        "orr w23, w23, w22",
-        "orr w24, w24, w22",
-        "strb w23, [x28, #744]",
+        "ubfx x25, x22, #2, #1",
+        "orr w22, w23, w25",
+        "orr w23, w24, w25",
+        "strb w22, [x28, #744]",
         "strb w21, [x28, #745]",
-        "strb w22, [x28, #746]",
-        "strb w24, [x28, #750]",
+        "strb w25, [x28, #746]",
+        "strb w23, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12768,8 +12920,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12803,20 +12955,20 @@
         "mov x22, x0",
         "ubfx x23, x22, #1, #1",
         "ubfx x24, x22, #0, #1",
-        "ubfx x22, x22, #2, #1",
-        "orr w23, w23, w22",
-        "orr w24, w24, w22",
-        "strb w23, [x28, #744]",
-        "mov w23, #0x0",
-        "strb w23, [x28, #745]",
-        "strb w22, [x28, #746]",
-        "strb w24, [x28, #750]",
+        "ubfx x25, x22, #2, #1",
+        "orr w22, w23, w25",
+        "orr w23, w24, w25",
+        "strb w22, [x28, #744]",
+        "mov w22, #0x0",
+        "strb w22, [x28, #745]",
+        "strb w25, [x28, #746]",
+        "strb w23, [x28, #750]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12828,8 +12980,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12863,21 +13015,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12889,8 +13041,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12924,21 +13076,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -12950,8 +13102,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -12985,21 +13137,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -13011,8 +13163,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -13046,21 +13198,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -13072,8 +13224,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -13107,21 +13259,21 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -13133,8 +13285,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -13168,32 +13320,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fiadd word [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xde !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -13205,7 +13358,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "sxth w1, w21",
+        "sxth w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1312]",
         "blr x2",
@@ -13250,21 +13403,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fimul word [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xde !11b /1"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -13276,7 +13430,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "sxth w1, w21",
+        "sxth w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1312]",
         "blr x2",
@@ -13321,21 +13475,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "ficom word [rax]": {
-      "ExpectedInstructionCount": 69,
+      "ExpectedInstructionCount": 70,
       "Comment": [
         "0xde !11b /2"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -13347,7 +13502,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "sxth w1, w21",
+        "sxth w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1312]",
         "blr x2",
@@ -13395,24 +13550,25 @@
         "mov x20, x0",
         "ubfx x21, x20, #1, #1",
         "ubfx x22, x20, #0, #1",
-        "ubfx x20, x20, #2, #1",
-        "orr w21, w21, w20",
-        "orr w22, w22, w20",
-        "strb w21, [x28, #744]",
-        "mov w21, #0x0",
-        "strb w21, [x28, #745]",
-        "strb w20, [x28, #746]",
-        "strb w22, [x28, #750]"
+        "ubfx x23, x20, #2, #1",
+        "orr w20, w21, w23",
+        "orr w21, w22, w23",
+        "strb w20, [x28, #744]",
+        "mov w20, #0x0",
+        "strb w20, [x28, #745]",
+        "strb w23, [x28, #746]",
+        "strb w21, [x28, #750]"
       ]
     },
     "ficomp word [rax]": {
-      "ExpectedInstructionCount": 77,
+      "ExpectedInstructionCount": 78,
       "Comment": [
         "0xde !11b /3"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -13424,7 +13580,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "sxth w1, w21",
+        "sxth w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1312]",
         "blr x2",
@@ -13472,32 +13628,33 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "strb w22, [x28, #744]",
-        "mov w22, #0x0",
-        "strb w22, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w23, [x28, #750]",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "strb w21, [x28, #744]",
+        "mov w21, #0x0",
+        "strb w21, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w22, [x28, #750]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fisub word [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xde !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -13509,7 +13666,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "sxth w1, w21",
+        "sxth w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1312]",
         "blr x2",
@@ -13554,21 +13711,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fisubr word [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xde !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -13580,7 +13738,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "sxth w1, w21",
+        "sxth w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1312]",
         "blr x2",
@@ -13625,21 +13783,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fidiv word [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xde !11b /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -13651,7 +13810,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "sxth w1, w21",
+        "sxth w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1312]",
         "blr x2",
@@ -13696,21 +13855,22 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "fidivr word [rax]": {
-      "ExpectedInstructionCount": 63,
+      "ExpectedInstructionCount": 64,
       "Comment": [
         "0xde !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -13722,7 +13882,7 @@
         "st1 {v2.2d, v3.2d}, [x0], #32",
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
-        "sxth w1, w21",
+        "sxth w1, w22",
         "ldrh w0, [x28, #1024]",
         "ldr x2, [x28, #1312]",
         "blr x2",
@@ -13767,11 +13927,11 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q4, [x0, #768]"
       ]
     },
     "faddp st0": {
@@ -13782,8 +13942,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -13814,19 +13974,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "faddp st1": {
@@ -13838,8 +13998,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -13870,18 +14030,18 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "faddp st2": {
@@ -13892,8 +14052,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -13924,19 +14084,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "faddp st3": {
@@ -13947,8 +14107,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -13979,19 +14139,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "faddp st4": {
@@ -14002,8 +14162,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14034,19 +14194,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "faddp st5": {
@@ -14057,8 +14217,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14089,19 +14249,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "faddp st6": {
@@ -14112,8 +14272,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14144,19 +14304,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "faddp st7": {
@@ -14167,8 +14327,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14199,19 +14359,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmulp st0": {
@@ -14222,8 +14382,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14254,19 +14414,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmulp st1": {
@@ -14278,8 +14438,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14310,18 +14470,18 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmulp st2": {
@@ -14332,8 +14492,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14364,19 +14524,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmulp st3": {
@@ -14387,8 +14547,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14419,19 +14579,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmulp st4": {
@@ -14442,8 +14602,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14474,19 +14634,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmulp st5": {
@@ -14497,8 +14657,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14529,19 +14689,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmulp st6": {
@@ -14552,8 +14712,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14584,19 +14744,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fmulp st7": {
@@ -14607,8 +14767,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14639,19 +14799,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fcompp": {
@@ -14663,8 +14823,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14698,25 +14858,25 @@
         "mov x22, x0",
         "ubfx x23, x22, #1, #1",
         "ubfx x24, x22, #0, #1",
-        "ubfx x22, x22, #2, #1",
-        "orr w23, w23, w22",
-        "orr w24, w24, w22",
-        "strb w23, [x28, #744]",
-        "mov w23, #0x0",
-        "strb w23, [x28, #745]",
-        "strb w22, [x28, #746]",
-        "strb w24, [x28, #750]",
+        "ubfx x25, x22, #2, #1",
+        "orr w22, w23, w25",
+        "orr w23, w24, w25",
+        "strb w22, [x28, #744]",
+        "mov w22, #0x0",
+        "strb w22, [x28, #745]",
+        "strb w25, [x28, #746]",
+        "strb w23, [x28, #750]",
         "ldrb w22, [x28, #1026]",
         "lsl w23, w21, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "bic w24, w22, w23",
+        "strb w24, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "lsl w22, w21, w20",
+        "bic w21, w24, w22",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -14730,8 +14890,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14762,19 +14922,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubrp st1, st0": {
@@ -14786,8 +14946,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14818,18 +14978,18 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubrp st2, st0": {
@@ -14840,8 +15000,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14872,19 +15032,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubrp st3, st0": {
@@ -14895,8 +15055,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14927,19 +15087,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubrp st4, st0": {
@@ -14950,8 +15110,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -14982,19 +15142,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubrp st5, st0": {
@@ -15005,8 +15165,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15037,19 +15197,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubrp st6, st0": {
@@ -15060,8 +15220,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15092,19 +15252,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubrp st7, st0": {
@@ -15115,8 +15275,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15147,19 +15307,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xde, 0xe8": {
@@ -15172,8 +15332,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15204,19 +15364,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubp st1, st0": {
@@ -15228,8 +15388,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15260,18 +15420,18 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubp st2, st0": {
@@ -15282,8 +15442,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15314,19 +15474,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubp st3, st0": {
@@ -15337,8 +15497,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15369,19 +15529,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubp st4, st0": {
@@ -15392,8 +15552,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15424,19 +15584,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubp st5, st0": {
@@ -15447,8 +15607,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15479,19 +15639,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubp st6, st0": {
@@ -15502,8 +15662,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15534,19 +15694,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fsubp st7, st0": {
@@ -15557,8 +15717,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15589,19 +15749,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xde, 0xf0": {
@@ -15614,8 +15774,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15646,19 +15806,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivrp st1, st0": {
@@ -15670,8 +15830,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15702,18 +15862,18 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivrp st2, st0": {
@@ -15724,8 +15884,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15756,19 +15916,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivrp st3, st0": {
@@ -15779,8 +15939,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15811,19 +15971,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivrp st4, st0": {
@@ -15834,8 +15994,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15866,19 +16026,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivrp st5, st0": {
@@ -15889,8 +16049,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15921,19 +16081,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivrp st6, st0": {
@@ -15944,8 +16104,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -15976,19 +16136,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivrp st7, st0": {
@@ -15999,8 +16159,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16031,19 +16191,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "db 0xde, 0xf8": {
@@ -16056,8 +16216,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16088,19 +16248,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivp st1, st0": {
@@ -16112,8 +16272,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16144,18 +16304,18 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivp st2, st0": {
@@ -16166,8 +16326,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16198,19 +16358,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivp st3, st0": {
@@ -16221,8 +16381,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16253,19 +16413,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivp st4, st0": {
@@ -16276,8 +16436,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16308,19 +16468,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivp st5, st0": {
@@ -16331,8 +16491,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16363,19 +16523,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivp st6, st0": {
@@ -16386,8 +16546,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16418,19 +16578,19 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fdivp st7, st0": {
@@ -16441,8 +16601,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16473,66 +16633,71 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "ldrb w22, [x28, #1026]",
+        "eor v4.16b, v4.16b, v4.16b",
+        "mov v4.d[0], x0",
+        "mov v4.h[4], w1",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str q2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str q4, [x0, #768]"
       ]
     },
     "fild word [rax]": {
-      "ExpectedInstructionCount": 35,
+      "ExpectedInstructionCount": 40,
       "Comment": [
         "0xdf !11b /0"
       ],
       "ExpectedArm64ASM": [
+        "sub sp, sp, #0x20 (32)",
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "mov w22, #0x0",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "mov w21, #0x0",
+        "sxth x23, w22",
         "mrs x22, nzcv",
-        "cmp x21, #0x0 (0)",
-        "mov w23, #0x8000",
-        "csel x23, x23, xzr, lt",
-        "cneg x21, x21, mi",
-        "mov w24, #0x3f",
+        "cmp x23, #0x0 (0)",
+        "mov w24, #0x8000",
+        "csel x25, x24, xzr, lt",
+        "cneg x24, x23, mi",
+        "mov w23, #0x3f",
         "mov x0, #0x3f",
-        "clz x25, x21",
-        "sub x25, x0, x25",
-        "sub x24, x24, x25",
-        "lsl x25, x21, x24",
+        "clz x30, x24",
+        "sub x30, x0, x30",
+        "sub x18, x23, x30",
+        "lsl x23, x24, x18",
         "mov w30, #0x403e",
-        "sub x24, x30, x24",
-        "mov w30, #0x0",
-        "cmp x21, #0x0 (0)",
-        "csel x21, x30, x24, eq",
-        "orr x21, x23, x21",
-        "fmov d2, x25",
+        "str w22, [sp]",
+        "sub x22, x30, x18",
+        "cmp x24, #0x0 (0)",
+        "csel x30, x21, x22, eq",
+        "orr x21, x25, x30",
+        "fmov d2, x23",
         "fmov d3, x21",
-        "mov v2.d[1], v3.d[0]",
+        "mov v4.16b, v2.16b",
+        "mov v4.d[1], v3.d[0]",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x22"
+        "str q4, [x0, #768]",
+        "ldr w20, [sp]",
+        "msr nzcv, x20",
+        "add sp, sp, #0x20 (32)"
       ]
     },
     "fisttp word [rax]": {
-      "ExpectedInstructionCount": 38,
+      "ExpectedInstructionCount": 39,
       "Comment": [
         "0xdf !11b /1"
       ],
@@ -16566,19 +16731,20 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "sxth x21, w0",
-        "strh w21, [x4]",
+        "mov x22, x4",
+        "strh w21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fist word [rax]": {
-      "ExpectedInstructionCount": 30,
+      "ExpectedInstructionCount": 31,
       "Comment": [
         "0xdf !11b /2"
       ],
@@ -16612,11 +16778,12 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "sxth x20, w0",
-        "strh w20, [x4]"
+        "mov x21, x4",
+        "strh w20, [x21]"
       ]
     },
     "fistp word [rax]": {
-      "ExpectedInstructionCount": 38,
+      "ExpectedInstructionCount": 39,
       "Comment": [
         "0xdf !11b /3"
       ],
@@ -16650,33 +16817,35 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "sxth x21, w0",
-        "strh w21, [x4]",
+        "mov x22, x4",
+        "strh w21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fbld tword [rax]": {
-      "ExpectedInstructionCount": 40,
+      "ExpectedInstructionCount": 41,
       "Comment": [
         "0xdf !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
-        "ldr q2, [x4]",
+        "mov x21, x4",
+        "ldr q2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -16702,15 +16871,15 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q3, [x0, #768]"
       ]
     },
     "fbstp tword [rax]": {
-      "ExpectedInstructionCount": 42,
+      "ExpectedInstructionCount": 43,
       "Comment": [
         "0xdf !11b /6"
       ],
@@ -16743,19 +16912,20 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "str d2, [x4]",
-        "mov x21, v2.d[1]",
-        "strh w21, [x4, #8]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "mov x21, x4",
+        "str d3, [x21]",
+        "mov x22, v3.d[1]",
+        "strh w22, [x21, #8]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -16766,8 +16936,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -16778,8 +16948,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -16790,8 +16960,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -16802,8 +16972,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -16814,8 +16984,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -16826,8 +16996,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -16838,8 +17008,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -16850,41 +17020,45 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fnstsw ax": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 16,
       "Comment": [
         "0xdf 11b 0xe0 /4"
       ],
       "ExpectedArm64ASM": [
         "mov w20, #0x0",
         "ldrb w21, [x28, #747]",
-        "bfi x20, x21, #11, #3",
-        "ldrb w21, [x28, #744]",
-        "ldrb w22, [x28, #745]",
+        "mov x22, x20",
+        "bfi x22, x21, #11, #3",
+        "ldrb w20, [x28, #744]",
+        "ldrb w21, [x28, #745]",
         "ldrb w23, [x28, #746]",
         "ldrb w24, [x28, #750]",
-        "orr x20, x20, x21, lsl #8",
-        "orr x20, x20, x22, lsl #9",
-        "orr x20, x20, x23, lsl #10",
-        "orr x20, x20, x24, lsl #14",
-        "bfxil x4, x20, #0, #16"
+        "orr x25, x22, x20, lsl #8",
+        "orr x20, x25, x21, lsl #9",
+        "orr x21, x20, x23, lsl #10",
+        "orr x20, x21, x24, lsl #14",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "fucomip st0": {
-      "ExpectedInstructionCount": 52,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "0xdf 11b 0xe8 /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16918,25 +17092,26 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "lsl x22, x22, #29",
-        "orr w22, w22, w23, lsl #30",
-        "mov w23, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w23, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "lsl x23, x21, #29",
+        "orr w21, w23, w22, lsl #30",
+        "mov w22, #0x1",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w22, w20",
+        "bic w22, w23, w24",
+        "strb w22, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "msr nzcv, x22"
+        "msr nzcv, x21"
       ]
     },
     "fucomip st1": {
-      "ExpectedInstructionCount": 52,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "0xdf 11b 0xe9 /5"
       ],
@@ -16944,8 +17119,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -16979,32 +17154,33 @@
         "mov x22, x0",
         "ubfx x23, x22, #1, #1",
         "ubfx x24, x22, #0, #1",
-        "ubfx x22, x22, #2, #1",
-        "orr w23, w23, w22",
-        "orr w24, w24, w22",
-        "lsl x23, x23, #29",
-        "orr w23, w23, w24, lsl #30",
-        "eor w26, w22, #0x1",
-        "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "ubfx x25, x22, #2, #1",
+        "orr w22, w23, w25",
+        "orr w23, w24, w25",
+        "lsl x24, x22, #29",
+        "orr w22, w24, w23, lsl #30",
+        "eor w23, w25, #0x1",
+        "mov x26, x23",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "msr nzcv, x23"
+        "msr nzcv, x22"
       ]
     },
     "fucomip st2": {
-      "ExpectedInstructionCount": 52,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "0xdf 11b 0xea /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17038,33 +17214,34 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "lsl x22, x22, #29",
-        "orr w22, w22, w23, lsl #30",
-        "mov w23, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w23, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "lsl x23, x21, #29",
+        "orr w21, w23, w22, lsl #30",
+        "mov w22, #0x1",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w22, w20",
+        "bic w22, w23, w24",
+        "strb w22, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "msr nzcv, x22"
+        "msr nzcv, x21"
       ]
     },
     "fucomip st3": {
-      "ExpectedInstructionCount": 52,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "0xdf 11b 0xeb /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17098,33 +17275,34 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "lsl x22, x22, #29",
-        "orr w22, w22, w23, lsl #30",
-        "mov w23, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w23, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "lsl x23, x21, #29",
+        "orr w21, w23, w22, lsl #30",
+        "mov w22, #0x1",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w22, w20",
+        "bic w22, w23, w24",
+        "strb w22, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "msr nzcv, x22"
+        "msr nzcv, x21"
       ]
     },
     "fucomip st4": {
-      "ExpectedInstructionCount": 52,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "0xdf 11b 0xec /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17158,33 +17336,34 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "lsl x22, x22, #29",
-        "orr w22, w22, w23, lsl #30",
-        "mov w23, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w23, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "lsl x23, x21, #29",
+        "orr w21, w23, w22, lsl #30",
+        "mov w22, #0x1",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w22, w20",
+        "bic w22, w23, w24",
+        "strb w22, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "msr nzcv, x22"
+        "msr nzcv, x21"
       ]
     },
     "fucomip st5": {
-      "ExpectedInstructionCount": 52,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "0xdf 11b 0xed /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17218,33 +17397,34 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "lsl x22, x22, #29",
-        "orr w22, w22, w23, lsl #30",
-        "mov w23, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w23, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "lsl x23, x21, #29",
+        "orr w21, w23, w22, lsl #30",
+        "mov w22, #0x1",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w22, w20",
+        "bic w22, w23, w24",
+        "strb w22, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "msr nzcv, x22"
+        "msr nzcv, x21"
       ]
     },
     "fucomip st6": {
-      "ExpectedInstructionCount": 52,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "0xdf 11b 0xee /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17278,33 +17458,34 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "lsl x22, x22, #29",
-        "orr w22, w22, w23, lsl #30",
-        "mov w23, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w23, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "lsl x23, x21, #29",
+        "orr w21, w23, w22, lsl #30",
+        "mov w22, #0x1",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w22, w20",
+        "bic w22, w23, w24",
+        "strb w22, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "msr nzcv, x22"
+        "msr nzcv, x21"
       ]
     },
     "fucomip st7": {
-      "ExpectedInstructionCount": 52,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "0xdf 11b 0xef /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17338,33 +17519,34 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "lsl x22, x22, #29",
-        "orr w22, w22, w23, lsl #30",
-        "mov w23, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w23, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "lsl x23, x21, #29",
+        "orr w21, w23, w22, lsl #30",
+        "mov w22, #0x1",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w22, w20",
+        "bic w22, w23, w24",
+        "strb w22, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "msr nzcv, x22"
+        "msr nzcv, x21"
       ]
     },
     "fcomip st0": {
-      "ExpectedInstructionCount": 52,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "0xdf 11b 0xf0 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17398,25 +17580,26 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "lsl x22, x22, #29",
-        "orr w22, w22, w23, lsl #30",
-        "mov w23, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w23, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "lsl x23, x21, #29",
+        "orr w21, w23, w22, lsl #30",
+        "mov w22, #0x1",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w22, w20",
+        "bic w22, w23, w24",
+        "strb w22, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "msr nzcv, x22"
+        "msr nzcv, x21"
       ]
     },
     "fcomip st1": {
-      "ExpectedInstructionCount": 52,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "0xdf 11b 0xf1 /6"
       ],
@@ -17424,8 +17607,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17459,32 +17642,33 @@
         "mov x22, x0",
         "ubfx x23, x22, #1, #1",
         "ubfx x24, x22, #0, #1",
-        "ubfx x22, x22, #2, #1",
-        "orr w23, w23, w22",
-        "orr w24, w24, w22",
-        "lsl x23, x23, #29",
-        "orr w23, w23, w24, lsl #30",
-        "eor w26, w22, #0x1",
-        "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "ubfx x25, x22, #2, #1",
+        "orr w22, w23, w25",
+        "orr w23, w24, w25",
+        "lsl x24, x22, #29",
+        "orr w22, w24, w23, lsl #30",
+        "eor w23, w25, #0x1",
+        "mov x26, x23",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "msr nzcv, x23"
+        "msr nzcv, x22"
       ]
     },
     "fcomip st2": {
-      "ExpectedInstructionCount": 52,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "0xdf 11b 0xf2 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17518,33 +17702,34 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "lsl x22, x22, #29",
-        "orr w22, w22, w23, lsl #30",
-        "mov w23, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w23, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "lsl x23, x21, #29",
+        "orr w21, w23, w22, lsl #30",
+        "mov w22, #0x1",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w22, w20",
+        "bic w22, w23, w24",
+        "strb w22, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "msr nzcv, x22"
+        "msr nzcv, x21"
       ]
     },
     "fcomip st3": {
-      "ExpectedInstructionCount": 52,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "0xdf 11b 0xf3 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17578,33 +17763,34 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "lsl x22, x22, #29",
-        "orr w22, w22, w23, lsl #30",
-        "mov w23, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w23, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "lsl x23, x21, #29",
+        "orr w21, w23, w22, lsl #30",
+        "mov w22, #0x1",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w22, w20",
+        "bic w22, w23, w24",
+        "strb w22, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "msr nzcv, x22"
+        "msr nzcv, x21"
       ]
     },
     "fcomip st4": {
-      "ExpectedInstructionCount": 52,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "0xdf 11b 0xf4 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17638,33 +17824,34 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "lsl x22, x22, #29",
-        "orr w22, w22, w23, lsl #30",
-        "mov w23, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w23, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "lsl x23, x21, #29",
+        "orr w21, w23, w22, lsl #30",
+        "mov w22, #0x1",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w22, w20",
+        "bic w22, w23, w24",
+        "strb w22, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "msr nzcv, x22"
+        "msr nzcv, x21"
       ]
     },
     "fcomip st5": {
-      "ExpectedInstructionCount": 52,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "0xdf 11b 0xf5 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17698,33 +17885,34 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "lsl x22, x22, #29",
-        "orr w22, w22, w23, lsl #30",
-        "mov w23, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w23, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "lsl x23, x21, #29",
+        "orr w21, w23, w22, lsl #30",
+        "mov w22, #0x1",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w22, w20",
+        "bic w22, w23, w24",
+        "strb w22, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "msr nzcv, x22"
+        "msr nzcv, x21"
       ]
     },
     "fcomip st6": {
-      "ExpectedInstructionCount": 52,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "0xdf 11b 0xf6 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17758,33 +17946,34 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "lsl x22, x22, #29",
-        "orr w22, w22, w23, lsl #30",
-        "mov w23, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w23, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "lsl x23, x21, #29",
+        "orr w21, w23, w22, lsl #30",
+        "mov w22, #0x1",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w22, w20",
+        "bic w22, w23, w24",
+        "strb w22, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "msr nzcv, x22"
+        "msr nzcv, x21"
       ]
     },
     "fcomip st7": {
-      "ExpectedInstructionCount": 52,
+      "ExpectedInstructionCount": 53,
       "Comment": [
         "0xdf 11b 0xf7 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr q2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
@@ -17818,21 +18007,22 @@
         "mov x21, x0",
         "ubfx x22, x21, #1, #1",
         "ubfx x23, x21, #0, #1",
-        "ubfx x21, x21, #2, #1",
-        "orr w22, w22, w21",
-        "orr w23, w23, w21",
-        "lsl x22, x22, #29",
-        "orr w22, w22, w23, lsl #30",
-        "mov w23, #0x1",
-        "eor w26, w21, #0x1",
-        "ldrb w21, [x28, #1026]",
-        "lsl w23, w23, w20",
-        "bic w21, w21, w23",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "ubfx x24, x21, #2, #1",
+        "orr w21, w22, w24",
+        "orr w22, w23, w24",
+        "lsl x23, x21, #29",
+        "orr w21, w23, w22, lsl #30",
+        "mov w22, #0x1",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
+        "ldrb w23, [x28, #1026]",
+        "lsl w24, w22, w20",
+        "bic w22, w23, w24",
+        "strb w22, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "strb w20, [x28, #747]",
-        "msr nzcv, x22"
+        "msr nzcv, x21"
       ]
     }
   }
diff --git a/unittests/InstructionCountCI/x87_f64.json b/unittests/InstructionCountCI/x87_f64.json
index 5dbd134b69..791ce6fc35 100644
--- a/unittests/InstructionCountCI/x87_f64.json
+++ b/unittests/InstructionCountCI/x87_f64.json
@@ -15,155 +15,163 @@
   },
   "Instructions": {
     "fadd dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xd8 !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "add x0, x28, x20, lsl #4",
-        "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "ldr d2, [x0, #768]",
+        "fadd d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xd8 !11b /1"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "add x0, x28, x20, lsl #4",
-        "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "ldr d2, [x0, #768]",
+        "fmul d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fcom dword [rax]": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "0xd8 !11b /2"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "add x0, x28, x20, lsl #4",
-        "ldr d3, [x0, #768]",
-        "fcmp d3, d2",
+        "ldr d2, [x0, #768]",
+        "fcmp d2, d3",
         "mov w20, #0x0",
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
     },
     "fcomp dword [rax]": {
-      "ExpectedInstructionCount": 24,
+      "ExpectedInstructionCount": 25,
       "Comment": [
         "0xd8 !11b /3"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "add x0, x28, x20, lsl #4",
-        "ldr d3, [x0, #768]",
-        "fcmp d3, d2",
+        "ldr d2, [x0, #768]",
+        "fcmp d2, d3",
         "mov w21, #0x1",
         "mov w22, #0x0",
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fsub dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xd8 !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "add x0, x28, x20, lsl #4",
-        "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "ldr d2, [x0, #768]",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xd8 !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "add x0, x28, x20, lsl #4",
-        "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "ldr d2, [x0, #768]",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xd8 !11b /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "add x0, x28, x20, lsl #4",
-        "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "ldr d2, [x0, #768]",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xd8 !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "add x0, x28, x20, lsl #4",
-        "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "ldr d2, [x0, #768]",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st0, st0": {
@@ -174,14 +182,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st0, st1": {
@@ -192,14 +200,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st0, st2": {
@@ -210,14 +218,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st0, st3": {
@@ -228,14 +236,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st0, st4": {
@@ -246,14 +254,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st0, st5": {
@@ -264,14 +272,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st0, st6": {
@@ -282,14 +290,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st0, st7": {
@@ -300,14 +308,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st0, st0": {
@@ -318,14 +326,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st0, st1": {
@@ -336,14 +344,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st0, st2": {
@@ -354,14 +362,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st0, st3": {
@@ -372,14 +380,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st0, st4": {
@@ -390,14 +398,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st0, st5": {
@@ -408,14 +416,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st0, st6": {
@@ -426,14 +434,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st0, st7": {
@@ -444,14 +452,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fcom st0, st0": {
@@ -463,8 +471,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x0",
         "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -472,10 +480,10 @@
         "cset w20, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w20",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w20",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w20",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w20",
+        "strb w23, [x28, #750]",
         "strb w21, [x28, #745]",
         "strb w20, [x28, #746]"
       ]
@@ -488,8 +496,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -498,10 +506,10 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
@@ -514,8 +522,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -524,10 +532,10 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
@@ -540,8 +548,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -550,10 +558,10 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
@@ -566,8 +574,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -576,10 +584,10 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
@@ -592,8 +600,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -602,10 +610,10 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
@@ -618,8 +626,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -628,10 +636,10 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
@@ -644,8 +652,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -654,10 +662,10 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
@@ -671,8 +679,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x0",
         "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -681,18 +689,18 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w21, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -705,8 +713,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -715,18 +723,18 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -738,8 +746,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -749,18 +757,18 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -772,8 +780,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -783,18 +791,18 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -806,8 +814,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -817,18 +825,18 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -840,8 +848,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -851,18 +859,18 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -874,8 +882,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -885,18 +893,18 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -908,8 +916,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -919,18 +927,18 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -942,14 +950,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st0, st1": {
@@ -960,14 +968,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st0, st2": {
@@ -978,14 +986,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st0, st3": {
@@ -996,14 +1004,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st0, st4": {
@@ -1014,14 +1022,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st0, st5": {
@@ -1032,14 +1040,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st0, st6": {
@@ -1050,14 +1058,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st0, st7": {
@@ -1068,14 +1076,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st0, st0": {
@@ -1086,14 +1094,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st0, st1": {
@@ -1104,14 +1112,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st0, st2": {
@@ -1122,14 +1130,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st0, st3": {
@@ -1140,14 +1148,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st0, st4": {
@@ -1158,14 +1166,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st0, st5": {
@@ -1176,14 +1184,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st0, st6": {
@@ -1194,14 +1202,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st0, st7": {
@@ -1212,14 +1220,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st0, st0": {
@@ -1230,14 +1238,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st0, st1": {
@@ -1248,14 +1256,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st0, st2": {
@@ -1266,14 +1274,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st0, st3": {
@@ -1284,14 +1292,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st0, st4": {
@@ -1302,14 +1310,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st0, st5": {
@@ -1320,14 +1328,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st0, st6": {
@@ -1338,14 +1346,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st0, st7": {
@@ -1356,14 +1364,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st0, st0": {
@@ -1374,14 +1382,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st0, st1": {
@@ -1392,14 +1400,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st0, st2": {
@@ -1410,14 +1418,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st0, st3": {
@@ -1428,14 +1436,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st0, st4": {
@@ -1446,14 +1454,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st0, st5": {
@@ -1464,14 +1472,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st0, st6": {
@@ -1482,14 +1490,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st0, st7": {
@@ -1500,39 +1508,40 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fld dword [rax]": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xd9 !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr s2, [x4]",
-        "fcvt d2, s2",
+        "mov x21, x4",
+        "ldr s2, [x21]",
+        "fcvt d3, s2",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "fst dword [rax]": {
-      "ExpectedInstructionCount": 5,
+      "ExpectedInstructionCount": 6,
       "Comment": [
         "0xd9 !11b /2"
       ],
@@ -1540,12 +1549,13 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "fcvt s2, d2",
-        "str s2, [x4]"
+        "fcvt s3, d2",
+        "mov x20, x4",
+        "str s3, [x20]"
       ]
     },
     "fstp dword [rax]": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xd9 !11b /3"
       ],
@@ -1553,98 +1563,101 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "fcvt s2, d2",
-        "str s2, [x4]",
+        "fcvt s3, d2",
+        "mov x21, x4",
+        "str s3, [x21]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fldenv [rax]": {
-      "ExpectedInstructionCount": 56,
+      "ExpectedInstructionCount": 57,
       "Comment": [
         "0xd9 !11b /4"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x4]",
-        "ubfx w21, w20, #10, #3",
-        "rbit w1, w21",
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "ubfx w22, w21, #10, #3",
+        "rbit w1, w22",
         "lsr w1, w1, #30",
         "mrs x0, fpcr",
         "bfi x0, x1, #22, #2",
-        "lsr x1, x21, #2",
+        "lsr x1, x22, #2",
         "bfi x0, x1, #24, #1",
         "msr fpcr, x0",
-        "strh w20, [x28, #1024]",
-        "ldr w20, [x4, #4]",
-        "ubfx w21, w20, #11, #3",
-        "strb w21, [x28, #747]",
-        "ubfx w21, w20, #8, #1",
-        "ubfx w22, w20, #9, #1",
-        "ubfx w23, w20, #10, #1",
-        "ubfx w20, w20, #14, #1",
-        "strb w21, [x28, #744]",
-        "strb w22, [x28, #745]",
-        "strb w23, [x28, #746]",
-        "strb w20, [x28, #750]",
-        "ldr w20, [x4, #8]",
-        "ubfx w21, w20, #0, #2",
+        "strh w21, [x28, #1024]",
+        "ldr w21, [x20, #4]",
+        "ubfx w22, w21, #11, #3",
+        "strb w22, [x28, #747]",
+        "ubfx w22, w21, #8, #1",
+        "ubfx w23, w21, #9, #1",
+        "ubfx w24, w21, #10, #1",
+        "ubfx w25, w21, #14, #1",
+        "strb w22, [x28, #744]",
+        "strb w23, [x28, #745]",
+        "strb w24, [x28, #746]",
+        "strb w25, [x28, #750]",
+        "ldr w21, [x20, #8]",
+        "ubfx w20, w21, #0, #2",
         "mrs x22, nzcv",
-        "cmp x21, #0x3 (3)",
-        "cset x21, ne",
-        "ubfx w23, w20, #2, #2",
-        "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #1",
-        "ubfx w23, w20, #4, #2",
-        "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #2",
-        "ubfx w23, w20, #6, #2",
-        "cmp x23, #0x3 (3)",
+        "cmp x20, #0x3 (3)",
         "cset x23, ne",
-        "orr w21, w21, w23, lsl #3",
-        "ubfx w23, w20, #8, #2",
+        "ubfx w20, w21, #2, #2",
+        "cmp x20, #0x3 (3)",
+        "cset x24, ne",
+        "orr w20, w23, w24, lsl #1",
+        "ubfx w23, w21, #4, #2",
         "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #4",
-        "ubfx w23, w20, #10, #2",
+        "cset x24, ne",
+        "orr w23, w20, w24, lsl #2",
+        "ubfx w20, w21, #6, #2",
+        "cmp x20, #0x3 (3)",
+        "cset x24, ne",
+        "orr w20, w23, w24, lsl #3",
+        "ubfx w23, w21, #8, #2",
         "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #5",
-        "ubfx w23, w20, #12, #2",
+        "cset x24, ne",
+        "orr w23, w20, w24, lsl #4",
+        "ubfx w20, w21, #10, #2",
+        "cmp x20, #0x3 (3)",
+        "cset x24, ne",
+        "orr w20, w23, w24, lsl #5",
+        "ubfx w23, w21, #12, #2",
         "cmp x23, #0x3 (3)",
-        "cset x23, ne",
-        "orr w21, w21, w23, lsl #6",
-        "ubfx w20, w20, #14, #2",
+        "cset x24, ne",
+        "orr w23, w20, w24, lsl #6",
+        "ubfx w20, w21, #14, #2",
         "cmp x20, #0x3 (3)",
-        "cset x20, ne",
-        "orr w20, w21, w20, lsl #7",
+        "cset x21, ne",
+        "orr w20, w23, w21, lsl #7",
         "strb w20, [x28, #1026]",
         "msr nzcv, x22"
       ]
     },
     "fldcw [rax]": {
-      "ExpectedInstructionCount": 10,
+      "ExpectedInstructionCount": 11,
       "Comment": [
         "0xd9 !11b /5"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x4]",
-        "ubfx w21, w20, #10, #3",
-        "rbit w1, w21",
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "ubfx w20, w21, #10, #3",
+        "rbit w1, w20",
         "lsr w1, w1, #30",
         "mrs x0, fpcr",
         "bfi x0, x1, #22, #2",
-        "lsr x1, x21, #2",
+        "lsr x1, x20, #2",
         "bfi x0, x1, #24, #1",
         "msr fpcr, x0",
-        "strh w20, [x28, #1024]"
+        "strh w21, [x28, #1024]"
       ]
     },
     "fnstenv [rax]": {
@@ -1653,80 +1666,81 @@
         "0xd9 !11b /6"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x28, #1024]",
-        "str w20, [x4]",
-        "mov w20, #0x0",
-        "ldrb w21, [x28, #747]",
-        "mov x0, x20",
-        "bfi x0, x21, #11, #3",
-        "mov x21, x0",
+        "mov x20, x4",
+        "ldrh w21, [x28, #1024]",
+        "str w21, [x20]",
+        "mov w21, #0x0",
+        "ldrb w22, [x28, #747]",
+        "mov x23, x21",
+        "bfi x23, x22, #11, #3",
         "ldrb w22, [x28, #744]",
-        "ldrb w23, [x28, #745]",
-        "ldrb w24, [x28, #746]",
-        "ldrb w25, [x28, #750]",
-        "orr x21, x21, x22, lsl #8",
-        "orr x21, x21, x23, lsl #9",
-        "orr x21, x21, x24, lsl #10",
-        "orr x21, x21, x25, lsl #14",
-        "str w21, [x4, #4]",
-        "ldrb w21, [x28, #1026]",
-        "and w22, w21, #0x1",
-        "mov w23, #0x3",
-        "mrs x24, nzcv",
+        "ldrb w24, [x28, #745]",
+        "ldrb w25, [x28, #746]",
+        "ldrb w30, [x28, #750]",
+        "orr x18, x23, x22, lsl #8",
+        "orr x22, x18, x24, lsl #9",
+        "orr x23, x22, x25, lsl #10",
+        "orr x22, x23, x30, lsl #14",
+        "str w22, [x20, #4]",
+        "ldrb w22, [x28, #1026]",
+        "and w23, w22, #0x1",
+        "mov w24, #0x3",
+        "mrs x25, nzcv",
+        "cmp x23, #0x0 (0)",
+        "csel x30, x24, x21, eq",
+        "orr w23, w21, w30",
+        "lsr w30, w22, #1",
+        "and w18, w30, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x30, x24, x21, eq",
+        "orr w18, w23, w30, lsl #2",
+        "lsr w23, w22, #2",
+        "and w30, w23, #0x1",
+        "cmp x30, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w30, w18, w23, lsl #4",
+        "lsr w23, w22, #3",
+        "and w18, w23, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w18, w30, w23, lsl #6",
+        "lsr w23, w22, #4",
+        "and w30, w23, #0x1",
+        "cmp x30, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w30, w18, w23, lsl #8",
+        "lsr w23, w22, #5",
+        "and w18, w23, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w18, w30, w23, lsl #10",
+        "lsr w23, w22, #6",
+        "and w30, w23, #0x1",
+        "cmp x30, #0x0 (0)",
+        "csel x23, x24, x21, eq",
+        "orr w30, w18, w23, lsl #12",
+        "lsr w23, w22, #7",
+        "and w22, w23, #0x1",
         "cmp x22, #0x0 (0)",
-        "csel x22, x23, x20, eq",
-        "orr w22, w20, w22",
-        "lsr w25, w21, #1",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #2",
-        "lsr w25, w21, #2",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #4",
-        "lsr w25, w21, #3",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #6",
-        "lsr w25, w21, #4",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #8",
-        "lsr w25, w21, #5",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #10",
-        "lsr w25, w21, #6",
-        "and w25, w25, #0x1",
-        "cmp x25, #0x0 (0)",
-        "csel x25, x23, x20, eq",
-        "orr w22, w22, w25, lsl #12",
-        "lsr w21, w21, #7",
-        "and w21, w21, #0x1",
-        "cmp x21, #0x0 (0)",
-        "csel x21, x23, x20, eq",
-        "orr w21, w22, w21, lsl #14",
-        "str w21, [x4, #8]",
-        "str w20, [x4, #12]",
-        "str w20, [x4, #16]",
-        "str w20, [x4, #20]",
-        "str w20, [x4, #24]",
-        "msr nzcv, x24"
+        "csel x23, x24, x21, eq",
+        "orr w22, w30, w23, lsl #14",
+        "str w22, [x20, #8]",
+        "str w21, [x20, #12]",
+        "str w21, [x20, #16]",
+        "str w21, [x20, #20]",
+        "str w21, [x20, #24]",
+        "msr nzcv, x25"
       ]
     },
     "fnstcw [rax]": {
-      "ExpectedInstructionCount": 2,
+      "ExpectedInstructionCount": 3,
       "Comment": [
         "0xd9 !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrh w20, [x28, #1024]",
-        "strh w20, [x4]"
+        "mov x21, x4",
+        "strh w20, [x21]"
       ]
     },
     "fld st0": {
@@ -1737,15 +1751,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -1761,14 +1775,14 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -1783,15 +1797,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -1806,15 +1820,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -1829,15 +1843,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -1852,15 +1866,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -1875,15 +1889,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -1898,15 +1912,15 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -1921,14 +1935,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -1940,14 +1954,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -1959,14 +1973,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -1978,14 +1992,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -1997,14 +2011,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -2016,14 +2030,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -2035,14 +2049,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -2054,14 +2068,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q3, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "str q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -2081,9 +2095,9 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "fneg v2.2d, v2.2d",
+        "fneg v3.2d, v2.2d",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "fabs": {
@@ -2095,9 +2109,9 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "fabs d2, d2",
+        "fabs d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "ftst": {
@@ -2115,10 +2129,10 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
@@ -2133,19 +2147,19 @@
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
         "mov x21, v2.d[0]",
-        "lsr x21, x21, #63",
-        "strb w21, [x28, #745]",
+        "lsr x22, x21, #63",
+        "strb w22, [x28, #745]",
         "ldrb w21, [x28, #1026]",
-        "lsr w20, w21, w20",
-        "mov w21, #0x1",
-        "and w20, w20, #0x1",
+        "lsr w22, w21, w20",
+        "mov w20, #0x1",
+        "and w21, w22, #0x1",
         "mov w22, #0x0",
         "mrs x23, nzcv",
-        "cmp x20, #0x1 (1)",
-        "csel x21, x22, x21, eq",
-        "strb w21, [x28, #744]",
-        "strb w20, [x28, #746]",
-        "strb w21, [x28, #750]",
+        "cmp x21, #0x1 (1)",
+        "csel x24, x22, x20, eq",
+        "strb w24, [x28, #744]",
+        "strb w21, [x28, #746]",
+        "strb w24, [x28, #750]",
         "msr nzcv, x23"
       ]
     },
@@ -2157,11 +2171,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "mov x21, #0x3ff0000000000000",
@@ -2178,11 +2192,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "mov x21, #0xa372",
@@ -2202,11 +2216,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "mov x21, #0x82fe",
@@ -2226,11 +2240,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "mov x21, #0x2d18",
@@ -2250,11 +2264,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "mov x21, #0x79ff",
@@ -2274,11 +2288,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "mov x21, #0x39ef",
@@ -2298,11 +2312,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "mov w21, #0x0",
@@ -2370,9 +2384,9 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
+        "mov v3.8b, v0.8b",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "fyl2x": {
@@ -2384,15 +2398,15 @@
         "ldrb w20, [x28, #747]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "strb w21, [x28, #747]",
+        "and w22, w21, #0x7",
+        "strb w22, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr d3, [x0, #768]",
         "mov v0.8b, v2.8b",
         "mov v1.8b, v3.8b",
@@ -2445,9 +2459,9 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "mov v4.8b, v0.8b",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fptan": {
@@ -2459,12 +2473,12 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "sub w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w22",
-        "orr w21, w23, w21",
+        "and w23, w22, #0x7",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w23",
+        "orr w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "strb w22, [x28, #747]",
+        "strb w23, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
@@ -2517,15 +2531,15 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
+        "mov v3.8b, v0.8b",
         "mov x21, #0x3ff0000000000000",
-        "fmov d3, x21",
+        "fmov d2, x21",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
-        "str d3, [x0, #768]"
+        "str d3, [x0, #768]",
+        "add x0, x28, x23, lsl #4",
+        "str d2, [x0, #768]"
       ]
     },
     "fpatan": {
@@ -2537,15 +2551,15 @@
         "ldrb w20, [x28, #747]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "strb w21, [x28, #747]",
+        "and w22, w21, #0x7",
+        "strb w22, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr d3, [x0, #768]",
         "mov v0.8b, v3.8b",
         "mov v1.8b, v2.8b",
@@ -2598,9 +2612,9 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "mov v4.8b, v0.8b",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fxtract": {
@@ -2612,25 +2626,25 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "sub w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w22",
-        "orr w21, w23, w21",
+        "and w23, w22, #0x7",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w23",
+        "orr w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "strb w22, [x28, #747]",
+        "strb w23, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
         "mov x21, v2.d[0]",
-        "and x23, x21, #0x7ff0000000000000",
-        "lsr x23, x23, #52",
-        "sub x23, x23, #0x3ff (1023)",
-        "scvtf d2, x23",
-        "and x21, x21, #0x800fffffffffffff",
-        "orr x21, x21, #0x3ff0000000000000",
+        "and x22, x21, #0x7ff0000000000000",
+        "lsr x24, x22, #52",
+        "sub x22, x24, #0x3ff (1023)",
+        "scvtf d2, x22",
+        "and x22, x21, #0x800fffffffffffff",
+        "orr x21, x22, #0x3ff0000000000000",
         "fmov d3, x21",
         "add x0, x28, x20, lsl #4",
         "str d2, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "str d3, [x0, #768]"
       ]
     },
@@ -2642,10 +2656,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr d3, [x0, #768]",
         "mov v0.8b, v2.8b",
         "mov v1.8b, v3.8b",
@@ -2698,11 +2712,11 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
+        "mov v4.8b, v0.8b",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdecstp": {
@@ -2712,8 +2726,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -2724,8 +2738,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -2737,10 +2751,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr d3, [x0, #768]",
         "mov v0.8b, v2.8b",
         "mov v1.8b, v3.8b",
@@ -2793,11 +2807,11 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
+        "mov v4.8b, v0.8b",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fyl2xp1": {
@@ -2809,20 +2823,20 @@
         "ldrb w20, [x28, #747]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "strb w21, [x28, #747]",
+        "and w22, w21, #0x7",
+        "strb w22, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr d3, [x0, #768]",
         "mov x20, #0x3ff0000000000000",
         "fmov d4, x20",
-        "fadd d2, d2, d4",
-        "mov v0.8b, v2.8b",
+        "fadd d5, d2, d4",
+        "mov v0.8b, v5.8b",
         "mov v1.8b, v3.8b",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -2874,7 +2888,7 @@
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
         "mov v2.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str d2, [x0, #768]"
       ]
     },
@@ -2887,9 +2901,9 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "fsqrt d2, d2",
+        "fsqrt d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "fsincos": {
@@ -2901,12 +2915,12 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "sub w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w22",
-        "orr w21, w23, w21",
+        "and w23, w22, #0x7",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w23",
+        "orr w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "strb w22, [x28, #747]",
+        "strb w23, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
@@ -3010,13 +3024,13 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
+        "mov v4.8b, v0.8b",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
         "str d3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "frndint": {
@@ -3028,9 +3042,9 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "frinti d2, d2",
+        "frinti d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "fscale": {
@@ -3041,10 +3055,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr d3, [x0, #768]",
         "mov v0.8b, v2.8b",
         "mov v1.8b, v3.8b",
@@ -3097,9 +3111,9 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
+        "mov v4.8b, v0.8b",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsin": {
@@ -3161,11 +3175,11 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
+        "mov v3.8b, v0.8b",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "fcos": {
@@ -3227,54 +3241,57 @@
         "ldp x19, x29, [x28, #120]",
         "ldr x26, [x28, #752]",
         "ldr x27, [x28, #760]",
-        "mov v2.8b, v0.8b",
+        "mov v3.8b, v0.8b",
         "mov w21, #0x0",
         "strb w21, [x28, #746]",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "fiadd dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xda !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fimul dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xda !11b /1"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "ficom dword [rax]": {
-      "ExpectedInstructionCount": 16,
+      "ExpectedInstructionCount": 17,
       "Comment": [
         "0xda !11b /2"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
@@ -3282,23 +3299,24 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
     },
     "ficomp dword [rax]": {
-      "ExpectedInstructionCount": 24,
+      "ExpectedInstructionCount": 25,
       "Comment": [
         "0xda !11b /3"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
@@ -3307,87 +3325,91 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fisub dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xda !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fisubr dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xda !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fidiv dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xda !11b /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fidivr dword [rax]": {
-      "ExpectedInstructionCount": 8,
+      "ExpectedInstructionCount": 9,
       "Comment": [
         "0xda !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fcmovb st0, st0": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc0 /0"
       ],
@@ -3396,18 +3418,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc1 /0"
       ],
@@ -3416,18 +3439,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st2": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc2 /0"
       ],
@@ -3436,18 +3460,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st3": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc3 /0"
       ],
@@ -3456,18 +3481,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st4": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc4 /0"
       ],
@@ -3476,18 +3502,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st5": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc5 /0"
       ],
@@ -3496,18 +3523,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st6": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc6 /0"
       ],
@@ -3516,18 +3544,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovb st0, st7": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc7 /0"
       ],
@@ -3536,18 +3565,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st0": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc8 /1"
       ],
@@ -3556,18 +3586,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xc9 /1"
       ],
@@ -3576,18 +3607,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st2": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xca /1"
       ],
@@ -3596,18 +3628,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st3": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xcb /1"
       ],
@@ -3616,18 +3649,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st4": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xcc /1"
       ],
@@ -3636,18 +3670,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st5": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xcd /1"
       ],
@@ -3656,18 +3691,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st6": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xce /1"
       ],
@@ -3676,18 +3712,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmove st0, st7": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xda 11b 0xcf /1"
       ],
@@ -3696,398 +3733,423 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st0": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd0 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st1": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd1 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st2": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd2 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st3": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd3 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st4": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd4 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st5": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd5 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st6": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd6 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovbe st0, st7": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xda 11b 0xd7 /0"
       ],
       "ExpectedArm64ASM": [
         "mov x20, #0xffffffffffffffff",
         "csetm x21, hs",
-        "csel x20, x20, x21, eq",
-        "dup v2.2d, x20",
+        "csel x22, x20, x21, eq",
+        "dup v2.2d, x22",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovu st0, st0": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xd8 /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x0 (0)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st1": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xd9 /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x1 (1)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st2": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xda /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x2 (2)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x2 (2)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st3": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xdb /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x3 (3)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x3 (3)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st4": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xdc /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x4 (4)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x4 (4)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st5": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xdd /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x5 (5)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x5 (5)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st6": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xde /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x6 (6)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x6 (6)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovu st0, st7": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xda 11b 0xdf /1"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eon w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x7 (7)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eon w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x7 (7)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fucompp": {
@@ -4099,8 +4161,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -4109,49 +4171,50 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
         "lsl w23, w21, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "bic w24, w22, w23",
+        "strb w24, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "lsl w22, w21, w20",
+        "bic w21, w24, w22",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fild dword [rax]": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdf !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
-        "ldr w21, [x4]",
-        "scvtf d2, w21",
+        "mov x21, x4",
+        "ldr w22, [x21]",
+        "scvtf d2, w22",
         "add x0, x28, x20, lsl #4",
         "str d2, [x0, #768]"
       ]
     },
     "fisttp dword [rax]": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb !11b /1"
       ],
@@ -4160,19 +4223,20 @@
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
         "fcvtzs w21, d2",
-        "str w21, [x4]",
+        "mov x22, x4",
+        "str w21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fist dword [rax]": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xdb !11b /2"
       ],
@@ -4182,11 +4246,12 @@
         "ldr d2, [x0, #768]",
         "frinti d0, d2",
         "fcvtzs w20, d0",
-        "str w20, [x4]"
+        "mov x21, x4",
+        "str w20, [x21]"
       ]
     },
     "fistp dword [rax]": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xdf !11b /7"
       ],
@@ -4196,25 +4261,27 @@
         "ldr d2, [x0, #768]",
         "frinti d0, d2",
         "fcvtzs w21, d0",
-        "str w21, [x4]",
+        "mov x22, x4",
+        "str w21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fld tword [rax]": {
-      "ExpectedInstructionCount": 38,
+      "ExpectedInstructionCount": 39,
       "Comment": [
         "0xdb !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr q2, [x4]",
+        "mov x21, x4",
+        "ldr q2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -4240,21 +4307,21 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v2.8b, v0.8b",
+        "mov v3.8b, v0.8b",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d3, [x0, #768]"
       ]
     },
     "fstp tword [rax]": {
-      "ExpectedInstructionCount": 41,
+      "ExpectedInstructionCount": 42,
       "Comment": [
         "0xdb !11b /7"
       ],
@@ -4286,24 +4353,25 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "str d2, [x4]",
-        "mov x21, v2.d[1]",
-        "strh w21, [x4, #8]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "mov x21, x4",
+        "str d3, [x21]",
+        "mov x22, v3.d[1]",
+        "strh w22, [x21, #8]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fcmovnb st0, st0": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc0 /0"
       ],
@@ -4312,18 +4380,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc1 /0"
       ],
@@ -4332,18 +4401,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st2": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc2 /0"
       ],
@@ -4352,18 +4422,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st3": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc3 /0"
       ],
@@ -4372,18 +4443,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st4": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc4 /0"
       ],
@@ -4392,18 +4464,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st5": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc5 /0"
       ],
@@ -4412,18 +4485,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st6": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc6 /0"
       ],
@@ -4432,18 +4506,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnb st0, st7": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc7 /0"
       ],
@@ -4452,18 +4527,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st0": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc8 /1"
       ],
@@ -4472,18 +4548,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st1": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xc9 /1"
       ],
@@ -4492,18 +4569,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st2": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xca /1"
       ],
@@ -4512,18 +4590,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st3": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xcb /1"
       ],
@@ -4532,18 +4611,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st4": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xcc /1"
       ],
@@ -4552,18 +4632,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st5": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xcd /1"
       ],
@@ -4572,18 +4653,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st6": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xce /1"
       ],
@@ -4592,18 +4674,19 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovne st0, st7": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdb 11b 0xcf /1"
       ],
@@ -4612,390 +4695,415 @@
         "dup v2.2d, x20",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st0": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd0 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st1": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd1 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st2": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd2 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st3": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd3 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st4": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd4 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st5": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd5 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st6": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd6 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnbe st0, st7": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdb 11b 0xd7 /2"
       ],
       "ExpectedArm64ASM": [
         "csetm x20, lo",
-        "csel x20, x20, xzr, ne",
-        "dup v2.2d, x20",
+        "csel x21, x20, xzr, ne",
+        "dup v2.2d, x21",
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
         "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]"
+        "str q5, [x0, #768]"
       ]
     },
     "fcmovnu st0, st0": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xd8 /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x0 (0)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st1": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xd9 /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x1 (1)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st2": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xda /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x2 (2)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x2 (2)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st3": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xdb /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x3 (3)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x3 (3)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st4": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xdc /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x4 (4)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x4 (4)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st5": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xdd /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x5 (5)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x5 (5)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st6": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xde /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x6 (6)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x6 (6)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fcmovnu st0, st7": {
-      "ExpectedInstructionCount": 18,
+      "ExpectedInstructionCount": 20,
       "Comment": [
         "0xdb 11b 0xdf /3"
       ],
       "ExpectedArm64ASM": [
-        "eor w20, w26, w26, lsr #4",
-        "eor w20, w20, w20, lsr #2",
-        "eor w20, w20, w20, lsr #1",
-        "mrs x21, nzcv",
-        "tst w20, #0x1",
-        "csetm x20, ne",
-        "dup v2.2d, x20",
-        "ldrb w20, [x28, #747]",
-        "add w22, w20, #0x7 (7)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "mov x20, x26",
+        "eor w21, w20, w20, lsr #4",
+        "eor w20, w21, w21, lsr #2",
+        "eor w21, w20, w20, lsr #1",
+        "mrs x20, nzcv",
+        "tst w21, #0x1",
+        "csetm x21, ne",
+        "dup v2.2d, x21",
+        "ldrb w21, [x28, #747]",
+        "add w22, w21, #0x7 (7)",
+        "and w23, w22, #0x7",
+        "add x0, x28, x21, lsl #4",
         "ldr q3, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "ldr q4, [x0, #768]",
-        "bsl v2.16b, v4.16b, v3.16b",
-        "add x0, x28, x20, lsl #4",
-        "str q2, [x0, #768]",
-        "msr nzcv, x21"
+        "mov v5.16b, v2.16b",
+        "bsl v5.16b, v4.16b, v3.16b",
+        "add x0, x28, x21, lsl #4",
+        "str q5, [x0, #768]",
+        "msr nzcv, x20"
       ]
     },
     "fnclex": {
@@ -5030,15 +5138,15 @@
       ]
     },
     "fucomi st0, st0": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xdb 11b 0xe8 /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -5046,24 +5154,25 @@
         "cset w20, eq",
         "cset w21, lo",
         "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
+        "orr w23, w21, w22",
+        "lsl x21, x23, #29",
+        "orr w23, w20, w22",
+        "orr w20, w21, w23, lsl #30",
+        "eor w21, w22, #0x1",
+        "mov x26, x21",
         "msr nzcv, x20"
       ]
     },
     "fucomi st0, st1": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xdb 11b 0xe9 /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -5071,24 +5180,25 @@
         "cset w20, eq",
         "cset w21, lo",
         "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
+        "orr w23, w21, w22",
+        "lsl x21, x23, #29",
+        "orr w23, w20, w22",
+        "orr w20, w21, w23, lsl #30",
+        "eor w21, w22, #0x1",
+        "mov x26, x21",
         "msr nzcv, x20"
       ]
     },
     "fucomi st0, st2": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xdb 11b 0xea /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -5096,24 +5206,25 @@
         "cset w20, eq",
         "cset w21, lo",
         "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
+        "orr w23, w21, w22",
+        "lsl x21, x23, #29",
+        "orr w23, w20, w22",
+        "orr w20, w21, w23, lsl #30",
+        "eor w21, w22, #0x1",
+        "mov x26, x21",
         "msr nzcv, x20"
       ]
     },
     "fucomi st0, st3": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xdb 11b 0xeb /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -5121,24 +5232,25 @@
         "cset w20, eq",
         "cset w21, lo",
         "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
+        "orr w23, w21, w22",
+        "lsl x21, x23, #29",
+        "orr w23, w20, w22",
+        "orr w20, w21, w23, lsl #30",
+        "eor w21, w22, #0x1",
+        "mov x26, x21",
         "msr nzcv, x20"
       ]
     },
     "fucomi st0, st4": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xdb 11b 0xec /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -5146,24 +5258,25 @@
         "cset w20, eq",
         "cset w21, lo",
         "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
+        "orr w23, w21, w22",
+        "lsl x21, x23, #29",
+        "orr w23, w20, w22",
+        "orr w20, w21, w23, lsl #30",
+        "eor w21, w22, #0x1",
+        "mov x26, x21",
         "msr nzcv, x20"
       ]
     },
     "fucomi st0, st5": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xdb 11b 0xed /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -5171,24 +5284,25 @@
         "cset w20, eq",
         "cset w21, lo",
         "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
+        "orr w23, w21, w22",
+        "lsl x21, x23, #29",
+        "orr w23, w20, w22",
+        "orr w20, w21, w23, lsl #30",
+        "eor w21, w22, #0x1",
+        "mov x26, x21",
         "msr nzcv, x20"
       ]
     },
     "fucomi st0, st6": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xdb 11b 0xee /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -5196,24 +5310,25 @@
         "cset w20, eq",
         "cset w21, lo",
         "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
+        "orr w23, w21, w22",
+        "lsl x21, x23, #29",
+        "orr w23, w20, w22",
+        "orr w20, w21, w23, lsl #30",
+        "eor w21, w22, #0x1",
+        "mov x26, x21",
         "msr nzcv, x20"
       ]
     },
     "fucomi st0, st7": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xdb 11b 0xef /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -5221,24 +5336,25 @@
         "cset w20, eq",
         "cset w21, lo",
         "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
+        "orr w23, w21, w22",
+        "lsl x21, x23, #29",
+        "orr w23, w20, w22",
+        "orr w20, w21, w23, lsl #30",
+        "eor w21, w22, #0x1",
+        "mov x26, x21",
         "msr nzcv, x20"
       ]
     },
     "fcomi st0, st0": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xdb 11b 0xf0 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -5246,24 +5362,25 @@
         "cset w20, eq",
         "cset w21, lo",
         "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
+        "orr w23, w21, w22",
+        "lsl x21, x23, #29",
+        "orr w23, w20, w22",
+        "orr w20, w21, w23, lsl #30",
+        "eor w21, w22, #0x1",
+        "mov x26, x21",
         "msr nzcv, x20"
       ]
     },
     "fcomi st0, st1": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xdb 11b 0xf1 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -5271,24 +5388,25 @@
         "cset w20, eq",
         "cset w21, lo",
         "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
+        "orr w23, w21, w22",
+        "lsl x21, x23, #29",
+        "orr w23, w20, w22",
+        "orr w20, w21, w23, lsl #30",
+        "eor w21, w22, #0x1",
+        "mov x26, x21",
         "msr nzcv, x20"
       ]
     },
     "fcomi st0, st2": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xdb 11b 0xf2 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -5296,24 +5414,25 @@
         "cset w20, eq",
         "cset w21, lo",
         "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
+        "orr w23, w21, w22",
+        "lsl x21, x23, #29",
+        "orr w23, w20, w22",
+        "orr w20, w21, w23, lsl #30",
+        "eor w21, w22, #0x1",
+        "mov x26, x21",
         "msr nzcv, x20"
       ]
     },
     "fcomi st0, st3": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xdb 11b 0xf3 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -5321,24 +5440,25 @@
         "cset w20, eq",
         "cset w21, lo",
         "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
+        "orr w23, w21, w22",
+        "lsl x21, x23, #29",
+        "orr w23, w20, w22",
+        "orr w20, w21, w23, lsl #30",
+        "eor w21, w22, #0x1",
+        "mov x26, x21",
         "msr nzcv, x20"
       ]
     },
     "fcomi st0, st4": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xdb 11b 0xf4 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -5346,24 +5466,25 @@
         "cset w20, eq",
         "cset w21, lo",
         "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
+        "orr w23, w21, w22",
+        "lsl x21, x23, #29",
+        "orr w23, w20, w22",
+        "orr w20, w21, w23, lsl #30",
+        "eor w21, w22, #0x1",
+        "mov x26, x21",
         "msr nzcv, x20"
       ]
     },
     "fcomi st0, st5": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xdb 11b 0xf5 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -5371,24 +5492,25 @@
         "cset w20, eq",
         "cset w21, lo",
         "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
+        "orr w23, w21, w22",
+        "lsl x21, x23, #29",
+        "orr w23, w20, w22",
+        "orr w20, w21, w23, lsl #30",
+        "eor w21, w22, #0x1",
+        "mov x26, x21",
         "msr nzcv, x20"
       ]
     },
     "fcomi st0, st6": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xdb 11b 0xf6 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -5396,24 +5518,25 @@
         "cset w20, eq",
         "cset w21, lo",
         "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
+        "orr w23, w21, w22",
+        "lsl x21, x23, #29",
+        "orr w23, w20, w22",
+        "orr w20, w21, w23, lsl #30",
+        "eor w21, w22, #0x1",
+        "mov x26, x21",
         "msr nzcv, x20"
       ]
     },
     "fcomi st0, st7": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xdb 11b 0xf7 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -5421,52 +5544,56 @@
         "cset w20, eq",
         "cset w21, lo",
         "cset w22, vs",
-        "orr w21, w21, w22",
-        "lsl x21, x21, #29",
-        "orr w20, w20, w22",
-        "orr w20, w21, w20, lsl #30",
-        "eor w26, w22, #0x1",
+        "orr w23, w21, w22",
+        "lsl x21, x23, #29",
+        "orr w23, w20, w22",
+        "orr w20, w21, w23, lsl #30",
+        "eor w21, w22, #0x1",
+        "mov x26, x21",
         "msr nzcv, x20"
       ]
     },
     "fadd qword [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xdc !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fmul qword [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xdc !11b /1"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fcom qword [rax]": {
-      "ExpectedInstructionCount": 15,
+      "ExpectedInstructionCount": 16,
       "Comment": [
         "0xdc !11b /2"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
@@ -5474,22 +5601,23 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
     },
     "fcomp qword [rax]": {
-      "ExpectedInstructionCount": 23,
+      "ExpectedInstructionCount": 24,
       "Comment": [
         "0xdc !11b /3"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
         "fcmp d3, d2",
@@ -5498,79 +5626,83 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fsub qword [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xdc !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr qword [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xdc !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv qword [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xdc !11b /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr qword [rax]": {
-      "ExpectedInstructionCount": 7,
+      "ExpectedInstructionCount": 8,
       "Comment": [
         "0xdc !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xc0": {
@@ -5583,14 +5715,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st1, st0": {
@@ -5601,14 +5733,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st2, st0": {
@@ -5619,14 +5751,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st3, st0": {
@@ -5637,14 +5769,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st4, st0": {
@@ -5655,14 +5787,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st5, st0": {
@@ -5673,14 +5805,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st6, st0": {
@@ -5691,14 +5823,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fadd st7, st0": {
@@ -5709,14 +5841,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fadd d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xc8": {
@@ -5729,14 +5861,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st1, st0": {
@@ -5747,14 +5879,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st2, st0": {
@@ -5765,14 +5897,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st3, st0": {
@@ -5783,14 +5915,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st4, st0": {
@@ -5801,14 +5933,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st5, st0": {
@@ -5819,14 +5951,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st6, st0": {
@@ -5837,14 +5969,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmul st7, st0": {
@@ -5855,14 +5987,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fmul d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xe0": {
@@ -5875,14 +6007,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st1, st0": {
@@ -5893,14 +6025,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st2, st0": {
@@ -5911,14 +6043,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st3, st0": {
@@ -5929,14 +6061,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st4, st0": {
@@ -5947,14 +6079,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st5, st0": {
@@ -5965,14 +6097,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st6, st0": {
@@ -5983,14 +6115,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubr st7, st0": {
@@ -6001,14 +6133,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xe8": {
@@ -6021,14 +6153,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st1, st0": {
@@ -6039,14 +6171,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st2, st0": {
@@ -6057,14 +6189,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st3, st0": {
@@ -6075,14 +6207,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st4, st0": {
@@ -6093,14 +6225,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st5, st0": {
@@ -6111,14 +6243,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st6, st0": {
@@ -6129,14 +6261,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsub st7, st0": {
@@ -6147,14 +6279,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fsub d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xf0": {
@@ -6167,14 +6299,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st1, st0": {
@@ -6185,14 +6317,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st2, st0": {
@@ -6203,14 +6335,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st3, st0": {
@@ -6221,14 +6353,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st4, st0": {
@@ -6239,14 +6371,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st5, st0": {
@@ -6257,14 +6389,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st6, st0": {
@@ -6275,14 +6407,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivr st7, st0": {
@@ -6293,14 +6425,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d3, d2",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xdc, 0xf8": {
@@ -6313,14 +6445,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st1, st0": {
@@ -6331,14 +6463,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st2, st0": {
@@ -6349,14 +6481,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st3, st0": {
@@ -6367,14 +6499,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st4, st0": {
@@ -6385,14 +6517,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st5, st0": {
@@ -6403,14 +6535,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st6, st0": {
@@ -6421,14 +6553,14 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdiv st7, st0": {
@@ -6439,30 +6571,31 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "fdiv d4, d2, d3",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fld qword [rax]": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdd !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldr d2, [x4]",
+        "mov x21, x4",
+        "ldr d2, [x21]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
@@ -6470,7 +6603,7 @@
       ]
     },
     "fisttp qword [rax]": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdd !11b /1"
       ],
@@ -6479,19 +6612,20 @@
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
         "fcvtzs x21, d2",
-        "str x21, [x4]",
+        "mov x22, x4",
+        "str x21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fst qword [rax]": {
-      "ExpectedInstructionCount": 4,
+      "ExpectedInstructionCount": 5,
       "Comment": [
         "0xdd !11b /2"
       ],
@@ -6499,11 +6633,12 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "str d2, [x4]"
+        "mov x20, x4",
+        "str d2, [x20]"
       ]
     },
     "fstp qword [rax]": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 13,
       "Comment": [
         "0xdd !11b /3"
       ],
@@ -6511,87 +6646,90 @@
         "ldrb w20, [x28, #747]",
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
-        "str d2, [x4]",
+        "mov x21, x4",
+        "str d2, [x21]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "frstor [rax]": {
-      "ExpectedInstructionCount": 325,
+      "ExpectedInstructionCount": 328,
       "Comment": [
         "0xdd !11b /4"
       ],
       "ExpectedArm64ASM": [
-        "ldrh w20, [x4]",
-        "lsr w21, w20, #10",
-        "and w21, w21, #0x3",
-        "rbit w1, w21",
+        "mov x20, x4",
+        "ldrh w21, [x20]",
+        "lsr w22, w21, #10",
+        "and w23, w22, #0x3",
+        "rbit w1, w23",
         "lsr w1, w1, #30",
         "mrs x0, fpcr",
         "bfi x0, x1, #22, #2",
-        "lsr x1, x21, #2",
+        "lsr x1, x23, #2",
         "bfi x0, x1, #24, #1",
         "msr fpcr, x0",
-        "strh w20, [x28, #1024]",
-        "strh w20, [x28, #1024]",
-        "ldr w20, [x4, #4]",
-        "ubfx w21, w20, #11, #3",
-        "strb w21, [x28, #747]",
-        "ubfx w22, w20, #8, #1",
-        "ubfx w23, w20, #9, #1",
-        "ubfx w24, w20, #10, #1",
-        "ubfx w20, w20, #14, #1",
-        "strb w22, [x28, #744]",
-        "strb w23, [x28, #745]",
-        "strb w24, [x28, #746]",
-        "strb w20, [x28, #750]",
-        "ldr w20, [x4, #8]",
-        "ubfx w22, w20, #0, #2",
-        "mrs x23, nzcv",
-        "cmp x22, #0x3 (3)",
-        "cset x22, ne",
-        "ubfx w24, w20, #2, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #1",
-        "ubfx w24, w20, #4, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #2",
-        "ubfx w24, w20, #6, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #3",
-        "ubfx w24, w20, #8, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #4",
-        "ubfx w24, w20, #10, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #5",
-        "ubfx w24, w20, #12, #2",
-        "cmp x24, #0x3 (3)",
-        "cset x24, ne",
-        "orr w22, w22, w24, lsl #6",
-        "ubfx w20, w20, #14, #2",
-        "cmp x20, #0x3 (3)",
-        "cset x20, ne",
-        "orr w20, w22, w20, lsl #7",
-        "strb w20, [x28, #1026]",
-        "add x20, x4, #0x1c (28)",
-        "mov x22, #0xffffffffffffffff",
-        "mov w24, #0xffff",
-        "fmov d2, x22",
-        "mov v2.d[1], x24",
-        "ldur q3, [x4, #28]",
-        "and v3.16b, v3.16b, v2.16b",
+        "strh w21, [x28, #1024]",
+        "strh w21, [x28, #1024]",
+        "ldr w21, [x20, #4]",
+        "ubfx w22, w21, #11, #3",
+        "strb w22, [x28, #747]",
+        "ubfx w23, w21, #8, #1",
+        "ubfx w24, w21, #9, #1",
+        "ubfx w25, w21, #10, #1",
+        "ubfx w30, w21, #14, #1",
+        "strb w23, [x28, #744]",
+        "strb w24, [x28, #745]",
+        "strb w25, [x28, #746]",
+        "strb w30, [x28, #750]",
+        "ldr w21, [x20, #8]",
+        "ubfx w23, w21, #0, #2",
+        "mrs x24, nzcv",
+        "cmp x23, #0x3 (3)",
+        "cset x25, ne",
+        "ubfx w23, w21, #2, #2",
+        "cmp x23, #0x3 (3)",
+        "cset x30, ne",
+        "orr w23, w25, w30, lsl #1",
+        "ubfx w25, w21, #4, #2",
+        "cmp x25, #0x3 (3)",
+        "cset x30, ne",
+        "orr w25, w23, w30, lsl #2",
+        "ubfx w23, w21, #6, #2",
+        "cmp x23, #0x3 (3)",
+        "cset x30, ne",
+        "orr w23, w25, w30, lsl #3",
+        "ubfx w25, w21, #8, #2",
+        "cmp x25, #0x3 (3)",
+        "cset x30, ne",
+        "orr w25, w23, w30, lsl #4",
+        "ubfx w23, w21, #10, #2",
+        "cmp x23, #0x3 (3)",
+        "cset x30, ne",
+        "orr w23, w25, w30, lsl #5",
+        "ubfx w25, w21, #12, #2",
+        "cmp x25, #0x3 (3)",
+        "cset x30, ne",
+        "orr w25, w23, w30, lsl #6",
+        "ubfx w23, w21, #14, #2",
+        "cmp x23, #0x3 (3)",
+        "cset x21, ne",
+        "orr w23, w25, w21, lsl #7",
+        "strb w23, [x28, #1026]",
+        "add x21, x20, #0x1c (28)",
+        "mov x23, #0xffffffffffffffff",
+        "mov w25, #0xffff",
+        "fmov d2, x23",
+        "mov v3.16b, v2.16b",
+        "mov v3.d[1], x25",
+        "ldur q2, [x20, #28]",
+        "and v4.16b, v2.16b, v3.16b",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -6604,8 +6742,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
+        "mov x1, v4.d[0]",
+        "umov w2, v4.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -6617,14 +6755,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v3.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
-        "str d3, [x0, #768]",
-        "add x22, x20, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x20, #10]",
-        "and v3.16b, v3.16b, v2.16b",
+        "mov v2.8b, v0.8b",
+        "add x0, x28, x22, lsl #4",
+        "str d2, [x0, #768]",
+        "add x20, x21, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x21, #10]",
+        "and v4.16b, v2.16b, v3.16b",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -6637,8 +6775,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
+        "mov x1, v4.d[0]",
+        "umov w2, v4.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -6650,14 +6788,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v3.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
-        "str d3, [x0, #768]",
-        "add x20, x22, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x22, #10]",
-        "and v3.16b, v3.16b, v2.16b",
+        "mov v2.8b, v0.8b",
+        "add x0, x28, x22, lsl #4",
+        "str d2, [x0, #768]",
+        "add x21, x20, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x20, #10]",
+        "and v4.16b, v2.16b, v3.16b",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -6670,8 +6808,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
+        "mov x1, v4.d[0]",
+        "umov w2, v4.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -6683,14 +6821,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v3.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
-        "str d3, [x0, #768]",
-        "add x22, x20, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x20, #10]",
-        "and v3.16b, v3.16b, v2.16b",
+        "mov v2.8b, v0.8b",
+        "add x0, x28, x22, lsl #4",
+        "str d2, [x0, #768]",
+        "add x20, x21, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x21, #10]",
+        "and v4.16b, v2.16b, v3.16b",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -6703,8 +6841,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
+        "mov x1, v4.d[0]",
+        "umov w2, v4.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -6716,14 +6854,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v3.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
-        "str d3, [x0, #768]",
-        "add x20, x22, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x22, #10]",
-        "and v3.16b, v3.16b, v2.16b",
+        "mov v2.8b, v0.8b",
+        "add x0, x28, x22, lsl #4",
+        "str d2, [x0, #768]",
+        "add x21, x20, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x20, #10]",
+        "and v4.16b, v2.16b, v3.16b",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -6736,8 +6874,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
+        "mov x1, v4.d[0]",
+        "umov w2, v4.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -6749,14 +6887,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v3.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
-        "str d3, [x0, #768]",
-        "add x22, x20, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x20, #10]",
-        "and v3.16b, v3.16b, v2.16b",
+        "mov v2.8b, v0.8b",
+        "add x0, x28, x22, lsl #4",
+        "str d2, [x0, #768]",
+        "add x20, x21, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x21, #10]",
+        "and v4.16b, v2.16b, v3.16b",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -6769,8 +6907,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v3.d[0]",
-        "umov w2, v3.h[4]",
+        "mov x1, v4.d[0]",
+        "umov w2, v4.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -6782,14 +6920,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "mov v3.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
-        "str d3, [x0, #768]",
-        "add x20, x22, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur q3, [x22, #10]",
-        "and v2.16b, v3.16b, v2.16b",
+        "mov v2.8b, v0.8b",
+        "add x0, x28, x22, lsl #4",
+        "str d2, [x0, #768]",
+        "add x21, x20, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur q2, [x20, #10]",
+        "and v4.16b, v2.16b, v3.16b",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -6802,8 +6940,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
+        "mov x1, v4.d[0]",
+        "umov w2, v4.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -6816,14 +6954,15 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "mov v2.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str d2, [x0, #768]",
-        "add x22, x20, #0xa (10)",
-        "add w21, w21, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "ldur d2, [x20, #10]",
-        "ldr h3, [x22, #8]",
-        "mov v2.h[4], v3.h[0]",
+        "add x20, x21, #0xa (10)",
+        "add w23, w22, #0x1 (1)",
+        "and w22, w23, #0x7",
+        "ldur d2, [x21, #10]",
+        "ldr h3, [x20, #8]",
+        "mov v4.16b, v2.16b",
+        "mov v4.h[4], v3.h[0]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -6836,8 +6975,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
+        "mov x1, v4.d[0]",
+        "umov w2, v4.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -6850,81 +6989,85 @@
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
         "mov v2.8b, v0.8b",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str d2, [x0, #768]",
-        "msr nzcv, x23"
+        "msr nzcv, x24"
       ]
     },
     "fnsave [rax]": {
-      "ExpectedInstructionCount": 335,
+      "ExpectedInstructionCount": 340,
       "Comment": [
         "0xdd !11b /6"
       ],
       "ExpectedArm64ASM": [
-        "ldrb w20, [x28, #747]",
-        "ldrh w21, [x28, #1024]",
-        "str w21, [x4]",
-        "mov w21, #0x0",
-        "mov x22, x21",
-        "bfi x22, x20, #11, #3",
-        "ldrb w23, [x28, #744]",
-        "ldrb w24, [x28, #745]",
-        "ldrb w25, [x28, #746]",
-        "ldrb w30, [x28, #750]",
-        "orr x22, x22, x23, lsl #8",
-        "orr x22, x22, x24, lsl #9",
-        "orr x22, x22, x25, lsl #10",
-        "orr x22, x22, x30, lsl #14",
-        "str w22, [x4, #4]",
-        "ldrb w22, [x28, #1026]",
-        "and w23, w22, #0x1",
+        "sub sp, sp, #0x20 (32)",
+        "mov x20, x4",
+        "ldrb w21, [x28, #747]",
+        "ldrh w22, [x28, #1024]",
+        "str w22, [x20]",
+        "mov w22, #0x0",
+        "mov x23, x22",
+        "bfi x23, x21, #11, #3",
+        "ldrb w24, [x28, #744]",
+        "ldrb w25, [x28, #745]",
+        "ldrb w30, [x28, #746]",
+        "ldrb w18, [x28, #750]",
+        "strb w21, [sp]",
+        "orr x21, x23, x24, lsl #8",
+        "orr x23, x21, x25, lsl #9",
+        "orr x21, x23, x30, lsl #10",
+        "orr x23, x21, x18, lsl #14",
+        "str w23, [x20, #4]",
+        "ldrb w21, [x28, #1026]",
+        "and w23, w21, #0x1",
         "mov w24, #0x3",
         "mrs x25, nzcv",
         "cmp x23, #0x0 (0)",
-        "csel x23, x24, x21, eq",
-        "orr w23, w21, w23",
-        "lsr w30, w22, #1",
-        "and w30, w30, #0x1",
-        "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #2",
-        "lsr w30, w22, #2",
-        "and w30, w30, #0x1",
+        "csel x30, x24, x22, eq",
+        "orr w23, w22, w30",
+        "lsr w30, w21, #1",
+        "and w18, w30, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x30, x24, x22, eq",
+        "orr w18, w23, w30, lsl #2",
+        "lsr w23, w21, #2",
+        "and w30, w23, #0x1",
         "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #4",
-        "lsr w30, w22, #3",
-        "and w30, w30, #0x1",
-        "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #6",
-        "lsr w30, w22, #4",
-        "and w30, w30, #0x1",
-        "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #8",
-        "lsr w30, w22, #5",
-        "and w30, w30, #0x1",
+        "csel x23, x24, x22, eq",
+        "orr w30, w18, w23, lsl #4",
+        "lsr w23, w21, #3",
+        "and w18, w23, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x23, x24, x22, eq",
+        "orr w18, w30, w23, lsl #6",
+        "lsr w23, w21, #4",
+        "and w30, w23, #0x1",
         "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #10",
-        "lsr w30, w22, #6",
-        "and w30, w30, #0x1",
+        "csel x23, x24, x22, eq",
+        "orr w30, w18, w23, lsl #8",
+        "lsr w23, w21, #5",
+        "and w18, w23, #0x1",
+        "cmp x18, #0x0 (0)",
+        "csel x23, x24, x22, eq",
+        "orr w18, w30, w23, lsl #10",
+        "lsr w23, w21, #6",
+        "and w30, w23, #0x1",
         "cmp x30, #0x0 (0)",
-        "csel x30, x24, x21, eq",
-        "orr w23, w23, w30, lsl #12",
-        "lsr w22, w22, #7",
-        "and w22, w22, #0x1",
-        "cmp x22, #0x0 (0)",
-        "csel x22, x24, x21, eq",
-        "orr w22, w23, w22, lsl #14",
-        "str w22, [x4, #8]",
-        "str w21, [x4, #12]",
-        "str w21, [x4, #16]",
-        "str w21, [x4, #20]",
-        "str w21, [x4, #24]",
-        "add x22, x4, #0x1c (28)",
-        "add x0, x28, x20, lsl #4",
+        "csel x23, x24, x22, eq",
+        "orr w30, w18, w23, lsl #12",
+        "lsr w23, w21, #7",
+        "and w21, w23, #0x1",
+        "cmp x21, #0x0 (0)",
+        "csel x23, x24, x22, eq",
+        "orr w21, w30, w23, lsl #14",
+        "str w21, [x20, #8]",
+        "str w22, [x20, #12]",
+        "str w22, [x20, #16]",
+        "str w22, [x20, #20]",
+        "str w22, [x20, #24]",
+        "add x21, x20, #0x1c (28)",
+        "ldrb w23, [sp]",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -6950,14 +7093,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "stur q2, [x4, #28]",
-        "add x23, x22, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "stur q3, [x20, #28]",
+        "add x20, x21, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -6983,14 +7126,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "stur q2, [x22, #10]",
-        "add x22, x23, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "stur q3, [x21, #10]",
+        "add x21, x20, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -7016,14 +7159,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "stur q2, [x23, #10]",
-        "add x23, x22, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "stur q3, [x20, #10]",
+        "add x20, x21, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -7049,14 +7192,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "stur q2, [x22, #10]",
-        "add x22, x23, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "stur q3, [x21, #10]",
+        "add x21, x20, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -7082,14 +7225,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "stur q2, [x23, #10]",
-        "add x23, x22, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "stur q3, [x20, #10]",
+        "add x20, x21, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -7115,14 +7258,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "stur q2, [x22, #10]",
-        "add x22, x23, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "stur q3, [x21, #10]",
+        "add x21, x20, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -7148,14 +7291,14 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "stur q2, [x23, #10]",
-        "add x23, x22, #0xa (10)",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "add x0, x28, x20, lsl #4",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "stur q3, [x20, #10]",
+        "add x20, x21, #0xa (10)",
+        "add w24, w23, #0x1 (1)",
+        "and w23, w24, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
@@ -7181,41 +7324,44 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
-        "stur d2, [x22, #10]",
-        "dup v2.8h, v2.h[4]",
-        "str h2, [x23, #8]",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
+        "stur d3, [x21, #10]",
+        "dup v2.8h, v3.h[4]",
+        "str h2, [x20, #8]",
         "mov w20, #0x37f",
         "strh w20, [x28, #1024]",
-        "strb w21, [x28, #747]",
-        "strb w21, [x28, #744]",
-        "strb w21, [x28, #745]",
-        "strb w21, [x28, #746]",
-        "strb w21, [x28, #750]",
-        "strb w21, [x28, #1026]",
-        "msr nzcv, x25"
+        "strb w22, [x28, #747]",
+        "strb w22, [x28, #744]",
+        "strb w22, [x28, #745]",
+        "strb w22, [x28, #746]",
+        "strb w22, [x28, #750]",
+        "strb w22, [x28, #1026]",
+        "msr nzcv, x25",
+        "add sp, sp, #0x20 (32)"
       ]
     },
     "fnstsw [rax]": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdd !11b /7"
       ],
       "ExpectedArm64ASM": [
         "mov w20, #0x0",
         "ldrb w21, [x28, #747]",
-        "bfi x20, x21, #11, #3",
-        "ldrb w21, [x28, #744]",
-        "ldrb w22, [x28, #745]",
+        "mov x22, x20",
+        "bfi x22, x21, #11, #3",
+        "ldrb w20, [x28, #744]",
+        "ldrb w21, [x28, #745]",
         "ldrb w23, [x28, #746]",
         "ldrb w24, [x28, #750]",
-        "orr x20, x20, x21, lsl #8",
-        "orr x20, x20, x22, lsl #9",
-        "orr x20, x20, x23, lsl #10",
-        "orr x20, x20, x24, lsl #14",
-        "strh w20, [x4]"
+        "orr x25, x22, x20, lsl #8",
+        "orr x20, x25, x21, lsl #9",
+        "orr x21, x20, x23, lsl #10",
+        "orr x20, x21, x24, lsl #14",
+        "mov x21, x4",
+        "strh w20, [x21]"
       ]
     },
     "ffree st0": {
@@ -7225,12 +7371,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x0 (0)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x0 (0)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -7242,11 +7388,11 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w20, w21, w20",
-        "bic w20, w22, w20",
+        "lsl w23, w21, w20",
+        "bic w20, w22, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -7257,12 +7403,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x2 (2)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x2 (2)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -7273,12 +7419,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x3 (3)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x3 (3)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -7289,12 +7435,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x4 (4)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x4 (4)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -7305,12 +7451,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x5 (5)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x5 (5)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -7321,12 +7467,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x6 (6)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x6 (6)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -7337,12 +7483,12 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x7 (7)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x7 (7)",
+        "and w20, w21, #0x7",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w20, w22, w20",
-        "bic w20, w21, w20",
+        "lsl w23, w22, w20",
+        "bic w20, w21, w23",
         "strb w20, [x28, #1026]"
       ]
     },
@@ -7354,10 +7500,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -7369,10 +7515,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -7384,10 +7530,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -7399,10 +7545,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -7414,10 +7560,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -7429,10 +7575,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -7444,10 +7590,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -7459,10 +7605,10 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]"
       ]
     },
@@ -7474,18 +7620,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7498,17 +7644,17 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
+        "and w23, w22, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x22, lsl #4",
+        "add x0, x28, x23, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7520,18 +7666,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7543,18 +7689,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7566,18 +7712,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7589,18 +7735,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7612,18 +7758,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7635,18 +7781,18 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
+        "and w22, w21, #0x7",
         "add x0, x28, x20, lsl #4",
         "ldr q2, [x0, #768]",
-        "add x0, x28, x21, lsl #4",
+        "add x0, x28, x22, lsl #4",
         "str q2, [x0, #768]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7659,8 +7805,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x0",
         "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7668,10 +7814,10 @@
         "cset w20, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w20",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w20",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w20",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w20",
+        "strb w23, [x28, #750]",
         "strb w21, [x28, #745]",
         "strb w20, [x28, #746]"
       ]
@@ -7684,8 +7830,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x1 (1)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7694,10 +7840,10 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
@@ -7710,8 +7856,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7720,10 +7866,10 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
@@ -7736,8 +7882,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7746,10 +7892,10 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
@@ -7762,8 +7908,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7772,10 +7918,10 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
@@ -7788,8 +7934,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7798,10 +7944,10 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
@@ -7814,8 +7960,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7824,10 +7970,10 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
@@ -7840,8 +7986,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7850,10 +7996,10 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
@@ -7867,8 +8013,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x0",
         "add w22, w20, #0x0 (0)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7877,18 +8023,18 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w21, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w21, [x28, #1026]",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7901,8 +8047,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7911,18 +8057,18 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7934,8 +8080,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7945,18 +8091,18 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -7968,8 +8114,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -7979,18 +8125,18 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -8002,8 +8148,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -8013,18 +8159,18 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -8036,8 +8182,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -8047,18 +8193,18 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -8070,8 +8216,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -8081,18 +8227,18 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -8104,8 +8250,8 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -8115,64 +8261,67 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fiadd word [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0xde !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, w21",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
+        "fadd d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fimul word [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0xde !11b /1"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, w21",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
+        "fmul d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "ficom word [rax]": {
-      "ExpectedInstructionCount": 17,
+      "ExpectedInstructionCount": 18,
       "Comment": [
         "0xde !11b /2"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, w21",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -8181,23 +8330,24 @@
         "cset w21, vs",
         "cset w22, eq",
         "cset w23, mi",
-        "orr w23, w23, w21",
-        "strb w23, [x28, #744]",
-        "orr w22, w22, w21",
-        "strb w22, [x28, #750]",
+        "orr w24, w23, w21",
+        "strb w24, [x28, #744]",
+        "orr w23, w22, w21",
+        "strb w23, [x28, #750]",
         "strb w20, [x28, #745]",
         "strb w21, [x28, #746]"
       ]
     },
     "ficomp word [rax]": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0xde !11b /3"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, w21",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -8207,87 +8357,91 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "lsl w23, w21, w20",
+        "bic w21, w22, w23",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fisub word [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0xde !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, w21",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
+        "fsub d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fisubr word [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0xde !11b /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, w21",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
+        "fsub d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fidiv word [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0xde !11b /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, w21",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
+        "fdiv d4, d3, d2",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "fidivr word [rax]": {
-      "ExpectedInstructionCount": 9,
+      "ExpectedInstructionCount": 10,
       "Comment": [
         "0xde !11b /7"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, w21",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
+        "fdiv d4, d2, d3",
         "add x0, x28, x20, lsl #4",
-        "str d2, [x0, #768]"
+        "str d4, [x0, #768]"
       ]
     },
     "faddp st0": {
@@ -8298,22 +8452,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fadd d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "faddp st1": {
@@ -8325,21 +8479,21 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "fadd d4, d3, d2",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "faddp st2": {
@@ -8350,22 +8504,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fadd d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "faddp st3": {
@@ -8376,22 +8530,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fadd d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "faddp st4": {
@@ -8402,22 +8556,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fadd d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "faddp st5": {
@@ -8428,22 +8582,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fadd d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "faddp st6": {
@@ -8454,22 +8608,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fadd d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "faddp st7": {
@@ -8480,22 +8634,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fadd d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fadd d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmulp st0": {
@@ -8506,22 +8660,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fmul d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmulp st1": {
@@ -8533,21 +8687,21 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "fmul d4, d3, d2",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmulp st2": {
@@ -8558,22 +8712,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fmul d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmulp st3": {
@@ -8584,22 +8738,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fmul d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmulp st4": {
@@ -8610,22 +8764,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fmul d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmulp st5": {
@@ -8636,22 +8790,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fmul d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmulp st6": {
@@ -8662,22 +8816,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fmul d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fmulp st7": {
@@ -8688,22 +8842,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fmul d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fmul d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fcompp": {
@@ -8715,8 +8869,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -8725,23 +8879,23 @@
         "cset w23, vs",
         "cset w24, eq",
         "cset w25, mi",
-        "orr w25, w25, w23",
-        "strb w25, [x28, #744]",
-        "orr w24, w24, w23",
-        "strb w24, [x28, #750]",
+        "orr w30, w25, w23",
+        "strb w30, [x28, #744]",
+        "orr w25, w24, w23",
+        "strb w25, [x28, #750]",
         "strb w22, [x28, #745]",
         "strb w23, [x28, #746]",
         "ldrb w22, [x28, #1026]",
         "lsl w23, w21, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "lsl w21, w21, w20",
-        "bic w21, w22, w21",
+        "bic w24, w22, w23",
+        "strb w24, [x28, #1026]",
+        "add w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
+        "lsl w22, w21, w20",
+        "bic w21, w24, w22",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -8755,22 +8909,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubrp st1, st0": {
@@ -8782,21 +8936,21 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "fsub d4, d3, d2",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubrp st2, st0": {
@@ -8807,22 +8961,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubrp st3, st0": {
@@ -8833,22 +8987,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubrp st4, st0": {
@@ -8859,22 +9013,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubrp st5, st0": {
@@ -8885,22 +9039,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubrp st6, st0": {
@@ -8911,22 +9065,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubrp st7, st0": {
@@ -8937,22 +9091,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xde, 0xe8": {
@@ -8965,22 +9119,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubp st1, st0": {
@@ -8992,21 +9146,21 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "fsub d4, d2, d3",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubp st2, st0": {
@@ -9017,22 +9171,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubp st3, st0": {
@@ -9043,22 +9197,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubp st4, st0": {
@@ -9069,22 +9223,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubp st5, st0": {
@@ -9095,22 +9249,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubp st6, st0": {
@@ -9121,22 +9275,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fsubp st7, st0": {
@@ -9147,22 +9301,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fsub d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fsub d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xde, 0xf0": {
@@ -9175,22 +9329,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
-        "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
+        "strb w20, [x28, #747]",
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivrp st1, st0": {
@@ -9202,21 +9356,21 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "fdiv d4, d3, d2",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivrp st2, st0": {
@@ -9227,22 +9381,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivrp st3, st0": {
@@ -9253,22 +9407,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivrp st4, st0": {
@@ -9279,22 +9433,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivrp st5, st0": {
@@ -9305,22 +9459,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivrp st6, st0": {
@@ -9331,22 +9485,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivrp st7, st0": {
@@ -9357,22 +9511,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d3, d2",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d3, d2",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "db 0xde, 0xf8": {
@@ -9385,22 +9539,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivp st1, st0": {
@@ -9412,21 +9566,21 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "fdiv d4, d2, d3",
+        "ldrb w22, [x28, #1026]",
+        "lsl w24, w21, w20",
+        "bic w21, w22, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x22, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x23, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivp st2, st0": {
@@ -9437,22 +9591,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivp st3, st0": {
@@ -9463,22 +9617,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivp st4, st0": {
@@ -9489,22 +9643,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivp st5, st0": {
@@ -9515,22 +9669,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivp st6, st0": {
@@ -9541,22 +9695,22 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fdivp st7, st0": {
@@ -9567,48 +9721,49 @@
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
-        "fdiv d2, d2, d3",
-        "ldrb w22, [x28, #1026]",
+        "fdiv d4, d2, d3",
+        "ldrb w21, [x28, #1026]",
         "mov w23, #0x1",
-        "lsl w23, w23, w20",
-        "bic w22, w22, w23",
-        "strb w22, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w24, w23, w20",
+        "bic w23, w21, w24",
+        "strb w23, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
-        "add x0, x28, x21, lsl #4",
-        "str d2, [x0, #768]"
+        "add x0, x28, x22, lsl #4",
+        "str d4, [x0, #768]"
       ]
     },
     "fild word [rax]": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xdf !11b /0"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
-        "ldrh w21, [x4]",
-        "sxth x21, w21",
+        "mov x21, x4",
+        "ldrh w22, [x21]",
+        "sxth x21, w22",
         "scvtf d2, x21",
         "add x0, x28, x20, lsl #4",
         "str d2, [x0, #768]"
       ]
     },
     "fisttp word [rax]": {
-      "ExpectedInstructionCount": 13,
+      "ExpectedInstructionCount": 14,
       "Comment": [
         "0xdf !11b /1"
       ],
@@ -9617,19 +9772,20 @@
         "add x0, x28, x20, lsl #4",
         "ldr d2, [x0, #768]",
         "fcvtzs x21, d2",
-        "strh w21, [x4]",
+        "mov x22, x4",
+        "strh w21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fist word [rax]": {
-      "ExpectedInstructionCount": 6,
+      "ExpectedInstructionCount": 7,
       "Comment": [
         "0xdf !11b /2"
       ],
@@ -9639,11 +9795,12 @@
         "ldr d2, [x0, #768]",
         "frinti d0, d2",
         "fcvtzs x20, d0",
-        "strh w20, [x4]"
+        "mov x21, x4",
+        "strh w20, [x21]"
       ]
     },
     "fistp word [rax]": {
-      "ExpectedInstructionCount": 14,
+      "ExpectedInstructionCount": 15,
       "Comment": [
         "0xdf !11b /3"
       ],
@@ -9653,33 +9810,35 @@
         "ldr d2, [x0, #768]",
         "frinti d0, d2",
         "fcvtzs x21, d0",
-        "strh w21, [x4]",
+        "mov x22, x4",
+        "strh w21, [x22]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fbld tword [rax]": {
-      "ExpectedInstructionCount": 66,
+      "ExpectedInstructionCount": 67,
       "Comment": [
         "0xdf !11b /4"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
-        "sub w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "sub w22, w20, #0x1 (1)",
+        "and w20, w22, #0x7",
         "ldrb w22, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "orr w21, w22, w21",
+        "lsl w23, w21, w20",
+        "orr w21, w22, w23",
         "strb w21, [x28, #1026]",
         "strb w20, [x28, #747]",
-        "ldr q2, [x4]",
+        "mov x21, x4",
+        "ldr q2, [x21]",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -9705,9 +9864,9 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -9720,8 +9879,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
+        "mov x1, v3.d[0]",
+        "umov w2, v3.h[4]",
         "ldr x3, [x28, #1192]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -9739,7 +9898,7 @@
       ]
     },
     "fbstp tword [rax]": {
-      "ExpectedInstructionCount": 69,
+      "ExpectedInstructionCount": 70,
       "Comment": [
         "0xdf !11b /6"
       ],
@@ -9771,9 +9930,9 @@
         "ld1 {v2.2d, v3.2d}, [sp], #32",
         "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
         "ldr x30, [sp], #16",
-        "eor v2.16b, v2.16b, v2.16b",
-        "mov v2.d[0], x0",
-        "mov v2.h[4], w1",
+        "eor v3.16b, v3.16b, v3.16b",
+        "mov v3.d[0], x0",
+        "mov v3.h[4], w1",
         "mrs x0, nzcv",
         "str w0, [x28, #728]",
         "stp x4, x5, [x28, #8]",
@@ -9786,8 +9945,8 @@
         "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x0], #64",
         "str x30, [x0], #16",
         "ldrh w0, [x28, #1024]",
-        "mov x1, v2.d[0]",
-        "umov w2, v2.h[4]",
+        "mov x1, v3.d[0]",
+        "umov w2, v3.h[4]",
         "ldr x3, [x28, #1392]",
         "blr x3",
         "ldr w4, [x28, #728]",
@@ -9802,16 +9961,17 @@
         "eor v2.16b, v2.16b, v2.16b",
         "mov v2.d[0], x0",
         "mov v2.h[4], w1",
-        "str d2, [x4]",
-        "mov x21, v2.d[1]",
-        "strh w21, [x4, #8]",
+        "mov x21, x4",
+        "str d2, [x21]",
+        "mov x22, v2.d[1]",
+        "strh w22, [x21, #8]",
         "ldrb w21, [x28, #1026]",
         "mov w22, #0x1",
-        "lsl w22, w22, w20",
-        "bic w21, w21, w22",
-        "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "lsl w23, w22, w20",
+        "bic w22, w21, w23",
+        "strb w22, [x28, #1026]",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -9822,8 +9982,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -9834,8 +9994,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -9846,8 +10006,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -9858,8 +10018,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -9870,8 +10030,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -9882,8 +10042,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -9894,8 +10054,8 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
@@ -9906,41 +10066,45 @@
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]"
       ]
     },
     "fnstsw ax": {
-      "ExpectedInstructionCount": 12,
+      "ExpectedInstructionCount": 16,
       "Comment": [
         "0xdf 11b 0xe0 /4"
       ],
       "ExpectedArm64ASM": [
         "mov w20, #0x0",
         "ldrb w21, [x28, #747]",
-        "bfi x20, x21, #11, #3",
-        "ldrb w21, [x28, #744]",
-        "ldrb w22, [x28, #745]",
+        "mov x22, x20",
+        "bfi x22, x21, #11, #3",
+        "ldrb w20, [x28, #744]",
+        "ldrb w21, [x28, #745]",
         "ldrb w23, [x28, #746]",
         "ldrb w24, [x28, #750]",
-        "orr x20, x20, x21, lsl #8",
-        "orr x20, x20, x22, lsl #9",
-        "orr x20, x20, x23, lsl #10",
-        "orr x20, x20, x24, lsl #14",
-        "bfxil x4, x20, #0, #16"
+        "orr x25, x22, x20, lsl #8",
+        "orr x20, x25, x21, lsl #9",
+        "orr x21, x20, x23, lsl #10",
+        "orr x20, x21, x24, lsl #14",
+        "mov x21, x4",
+        "mov x22, x21",
+        "bfxil x22, x20, #0, #16",
+        "mov x4, x22"
       ]
     },
     "fucomip st0": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0xdf 11b 0xe8 /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -9949,23 +10113,24 @@
         "cset w22, eq",
         "cset w23, lo",
         "cset w24, vs",
-        "orr w23, w23, w24",
-        "lsl x23, x23, #29",
-        "orr w22, w22, w24",
-        "orr w22, w23, w22, lsl #30",
-        "eor w26, w24, #0x1",
+        "orr w25, w23, w24",
+        "lsl x23, x25, #29",
+        "orr w25, w22, w24",
+        "orr w22, w23, w25, lsl #30",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
         "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
         "msr nzcv, x22"
       ]
     },
     "fucomip st1": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0xdf 11b 0xe9 /5"
       ],
@@ -9973,8 +10138,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -9982,31 +10147,32 @@
         "cset w22, eq",
         "cset w23, lo",
         "cset w24, vs",
-        "orr w23, w23, w24",
-        "lsl x23, x23, #29",
-        "orr w22, w22, w24",
-        "orr w22, w23, w22, lsl #30",
-        "eor w26, w24, #0x1",
+        "orr w25, w23, w24",
+        "lsl x23, x25, #29",
+        "orr w25, w22, w24",
+        "orr w22, w23, w25, lsl #30",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
         "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
         "msr nzcv, x22"
       ]
     },
     "fucomip st2": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0xdf 11b 0xea /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -10015,31 +10181,32 @@
         "cset w22, eq",
         "cset w23, lo",
         "cset w24, vs",
-        "orr w23, w23, w24",
-        "lsl x23, x23, #29",
-        "orr w22, w22, w24",
-        "orr w22, w23, w22, lsl #30",
-        "eor w26, w24, #0x1",
+        "orr w25, w23, w24",
+        "lsl x23, x25, #29",
+        "orr w25, w22, w24",
+        "orr w22, w23, w25, lsl #30",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
         "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
         "msr nzcv, x22"
       ]
     },
     "fucomip st3": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0xdf 11b 0xeb /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -10048,31 +10215,32 @@
         "cset w22, eq",
         "cset w23, lo",
         "cset w24, vs",
-        "orr w23, w23, w24",
-        "lsl x23, x23, #29",
-        "orr w22, w22, w24",
-        "orr w22, w23, w22, lsl #30",
-        "eor w26, w24, #0x1",
+        "orr w25, w23, w24",
+        "lsl x23, x25, #29",
+        "orr w25, w22, w24",
+        "orr w22, w23, w25, lsl #30",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
         "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
         "msr nzcv, x22"
       ]
     },
     "fucomip st4": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0xdf 11b 0xec /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -10081,31 +10249,32 @@
         "cset w22, eq",
         "cset w23, lo",
         "cset w24, vs",
-        "orr w23, w23, w24",
-        "lsl x23, x23, #29",
-        "orr w22, w22, w24",
-        "orr w22, w23, w22, lsl #30",
-        "eor w26, w24, #0x1",
+        "orr w25, w23, w24",
+        "lsl x23, x25, #29",
+        "orr w25, w22, w24",
+        "orr w22, w23, w25, lsl #30",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
         "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
         "msr nzcv, x22"
       ]
     },
     "fucomip st5": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0xdf 11b 0xed /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -10114,31 +10283,32 @@
         "cset w22, eq",
         "cset w23, lo",
         "cset w24, vs",
-        "orr w23, w23, w24",
-        "lsl x23, x23, #29",
-        "orr w22, w22, w24",
-        "orr w22, w23, w22, lsl #30",
-        "eor w26, w24, #0x1",
+        "orr w25, w23, w24",
+        "lsl x23, x25, #29",
+        "orr w25, w22, w24",
+        "orr w22, w23, w25, lsl #30",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
         "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
         "msr nzcv, x22"
       ]
     },
     "fucomip st6": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0xdf 11b 0xee /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -10147,31 +10317,32 @@
         "cset w22, eq",
         "cset w23, lo",
         "cset w24, vs",
-        "orr w23, w23, w24",
-        "lsl x23, x23, #29",
-        "orr w22, w22, w24",
-        "orr w22, w23, w22, lsl #30",
-        "eor w26, w24, #0x1",
+        "orr w25, w23, w24",
+        "lsl x23, x25, #29",
+        "orr w25, w22, w24",
+        "orr w22, w23, w25, lsl #30",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
         "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
         "msr nzcv, x22"
       ]
     },
     "fucomip st7": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0xdf 11b 0xef /5"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -10180,31 +10351,32 @@
         "cset w22, eq",
         "cset w23, lo",
         "cset w24, vs",
-        "orr w23, w23, w24",
-        "lsl x23, x23, #29",
-        "orr w22, w22, w24",
-        "orr w22, w23, w22, lsl #30",
-        "eor w26, w24, #0x1",
+        "orr w25, w23, w24",
+        "lsl x23, x25, #29",
+        "orr w25, w22, w24",
+        "orr w22, w23, w25, lsl #30",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
         "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
         "msr nzcv, x22"
       ]
     },
     "fcomip st0": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0xdf 11b 0xf0 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x0 (0)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -10213,23 +10385,24 @@
         "cset w22, eq",
         "cset w23, lo",
         "cset w24, vs",
-        "orr w23, w23, w24",
-        "lsl x23, x23, #29",
-        "orr w22, w22, w24",
-        "orr w22, w23, w22, lsl #30",
-        "eor w26, w24, #0x1",
+        "orr w25, w23, w24",
+        "lsl x23, x25, #29",
+        "orr w25, w22, w24",
+        "orr w22, w23, w25, lsl #30",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
         "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
         "msr nzcv, x22"
       ]
     },
     "fcomip st1": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0xdf 11b 0xf1 /6"
       ],
@@ -10237,8 +10410,8 @@
         "ldrb w20, [x28, #747]",
         "mov w21, #0x1",
         "add w22, w20, #0x1 (1)",
-        "and w22, w22, #0x7",
-        "add x0, x28, x22, lsl #4",
+        "and w23, w22, #0x7",
+        "add x0, x28, x23, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -10246,31 +10419,32 @@
         "cset w22, eq",
         "cset w23, lo",
         "cset w24, vs",
-        "orr w23, w23, w24",
-        "lsl x23, x23, #29",
-        "orr w22, w22, w24",
-        "orr w22, w23, w22, lsl #30",
-        "eor w26, w24, #0x1",
+        "orr w25, w23, w24",
+        "lsl x23, x25, #29",
+        "orr w25, w22, w24",
+        "orr w22, w23, w25, lsl #30",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
         "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
         "msr nzcv, x22"
       ]
     },
     "fcomip st2": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0xdf 11b 0xf2 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x2 (2)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -10279,31 +10453,32 @@
         "cset w22, eq",
         "cset w23, lo",
         "cset w24, vs",
-        "orr w23, w23, w24",
-        "lsl x23, x23, #29",
-        "orr w22, w22, w24",
-        "orr w22, w23, w22, lsl #30",
-        "eor w26, w24, #0x1",
+        "orr w25, w23, w24",
+        "lsl x23, x25, #29",
+        "orr w25, w22, w24",
+        "orr w22, w23, w25, lsl #30",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
         "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
         "msr nzcv, x22"
       ]
     },
     "fcomip st3": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0xdf 11b 0xf3 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x3 (3)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -10312,31 +10487,32 @@
         "cset w22, eq",
         "cset w23, lo",
         "cset w24, vs",
-        "orr w23, w23, w24",
-        "lsl x23, x23, #29",
-        "orr w22, w22, w24",
-        "orr w22, w23, w22, lsl #30",
-        "eor w26, w24, #0x1",
+        "orr w25, w23, w24",
+        "lsl x23, x25, #29",
+        "orr w25, w22, w24",
+        "orr w22, w23, w25, lsl #30",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
         "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
         "msr nzcv, x22"
       ]
     },
     "fcomip st4": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0xdf 11b 0xf4 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x4 (4)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -10345,31 +10521,32 @@
         "cset w22, eq",
         "cset w23, lo",
         "cset w24, vs",
-        "orr w23, w23, w24",
-        "lsl x23, x23, #29",
-        "orr w22, w22, w24",
-        "orr w22, w23, w22, lsl #30",
-        "eor w26, w24, #0x1",
+        "orr w25, w23, w24",
+        "lsl x23, x25, #29",
+        "orr w25, w22, w24",
+        "orr w22, w23, w25, lsl #30",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
         "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
         "msr nzcv, x22"
       ]
     },
     "fcomip st5": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0xdf 11b 0xf5 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x5 (5)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -10378,31 +10555,32 @@
         "cset w22, eq",
         "cset w23, lo",
         "cset w24, vs",
-        "orr w23, w23, w24",
-        "lsl x23, x23, #29",
-        "orr w22, w22, w24",
-        "orr w22, w23, w22, lsl #30",
-        "eor w26, w24, #0x1",
+        "orr w25, w23, w24",
+        "lsl x23, x25, #29",
+        "orr w25, w22, w24",
+        "orr w22, w23, w25, lsl #30",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
         "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
         "msr nzcv, x22"
       ]
     },
     "fcomip st6": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0xdf 11b 0xf6 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x6 (6)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -10411,31 +10589,32 @@
         "cset w22, eq",
         "cset w23, lo",
         "cset w24, vs",
-        "orr w23, w23, w24",
-        "lsl x23, x23, #29",
-        "orr w22, w22, w24",
-        "orr w22, w23, w22, lsl #30",
-        "eor w26, w24, #0x1",
+        "orr w25, w23, w24",
+        "lsl x23, x25, #29",
+        "orr w25, w22, w24",
+        "orr w22, w23, w25, lsl #30",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
         "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
         "msr nzcv, x22"
       ]
     },
     "fcomip st7": {
-      "ExpectedInstructionCount": 25,
+      "ExpectedInstructionCount": 26,
       "Comment": [
         "0xdf 11b 0xf7 /6"
       ],
       "ExpectedArm64ASM": [
         "ldrb w20, [x28, #747]",
         "add w21, w20, #0x7 (7)",
-        "and w21, w21, #0x7",
-        "add x0, x28, x21, lsl #4",
+        "and w22, w21, #0x7",
+        "add x0, x28, x22, lsl #4",
         "ldr d2, [x0, #768]",
         "add x0, x28, x20, lsl #4",
         "ldr d3, [x0, #768]",
@@ -10444,17 +10623,18 @@
         "cset w22, eq",
         "cset w23, lo",
         "cset w24, vs",
-        "orr w23, w23, w24",
-        "lsl x23, x23, #29",
-        "orr w22, w22, w24",
-        "orr w22, w23, w22, lsl #30",
-        "eor w26, w24, #0x1",
+        "orr w25, w23, w24",
+        "lsl x23, x25, #29",
+        "orr w25, w22, w24",
+        "orr w22, w23, w25, lsl #30",
+        "eor w23, w24, #0x1",
+        "mov x26, x23",
         "ldrb w23, [x28, #1026]",
-        "lsl w21, w21, w20",
-        "bic w21, w23, w21",
+        "lsl w24, w21, w20",
+        "bic w21, w23, w24",
         "strb w21, [x28, #1026]",
-        "add w20, w20, #0x1 (1)",
-        "and w20, w20, #0x7",
+        "add w21, w20, #0x1 (1)",
+        "and w20, w21, #0x7",
         "strb w20, [x28, #747]",
         "msr nzcv, x22"
       ]