From fd1043668ddddf69a2e08b12fc873c96d57d9139 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Sun, 23 Jun 2024 17:27:22 -0700 Subject: [PATCH] InstCountCI: Adds AVX128 tests --- .../InstructionCountCI/AVX128/VEX_map1.json | 5539 ++++++++++++++ .../AVX128/VEX_map1_FCMA.json | 134 + .../InstructionCountCI/AVX128/VEX_map2.json | 6421 +++++++++++++++++ .../AVX128/VEX_map2_SVE128.json | 3505 +++++++++ .../InstructionCountCI/AVX128/VEX_map3.json | 4009 ++++++++++ .../AVX128/VEX_map_group.json | 691 ++ 6 files changed, 20299 insertions(+) create mode 100644 unittests/InstructionCountCI/AVX128/VEX_map1.json create mode 100644 unittests/InstructionCountCI/AVX128/VEX_map1_FCMA.json create mode 100644 unittests/InstructionCountCI/AVX128/VEX_map2.json create mode 100644 unittests/InstructionCountCI/AVX128/VEX_map2_SVE128.json create mode 100644 unittests/InstructionCountCI/AVX128/VEX_map3.json create mode 100644 unittests/InstructionCountCI/AVX128/VEX_map_group.json diff --git a/unittests/InstructionCountCI/AVX128/VEX_map1.json b/unittests/InstructionCountCI/AVX128/VEX_map1.json new file mode 100644 index 0000000000..6339ddf517 --- /dev/null +++ b/unittests/InstructionCountCI/AVX128/VEX_map1.json @@ -0,0 +1,5539 @@ +{ + "Features": { + "Bitness": 64, + "EnabledHostFeatures": [ + "AVX" + ], + "DisabledHostFeatures": [ + "FCMA", + "RPRES", + "AFP", + "FLAGM", + "FLAGM2", + "SVE256", + "SVE128" + ] + }, + "Instructions": { + "vmovups xmm0, xmm0": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 1 0b00 0x10 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovups xmm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x10 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovups ymm0, ymm0": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Spurious moves", + "Map 1 0b00 0x10 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "str q2, [x28, #16]" + ] + }, + "vmovups ymm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x10 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]", + "ldr q2, [x4, #16]", + "str q2, [x28, #16]" + ] + }, + "vmovupd xmm0, xmm0": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 1 0b01 0x10 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovupd xmm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x10 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovupd ymm0, ymm0": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Spurious moves", + "Map 1 0b01 0x10 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "str q2, [x28, #16]" + ] + }, + "vmovupd ymm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x10 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]", + "ldr q2, [x4, #16]", + "str q2, [x28, #16]" + ] + }, + "vmovss xmm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b10 0x10 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr s16, [x4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b10 0x10 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "mov v16.s[0], v18.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovsd xmm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b11 0x10 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr d16, [x4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovsd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Insert in to first element could be more optimal, which is the common case.", + "Map 1 0b11 0x10 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "mov v16.d[0], v18.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovups [rax], xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b00 0x11 128-bit" + ], + "ExpectedArm64ASM": [ + "str q16, [x4]" + ] + }, + "vmovups [rax], ymm0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x11 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "str q16, [x4]", + "str q2, [x4, #16]" + ] + }, + "vmovupd [rax], xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b01 0x11 128-bit" + ], + "ExpectedArm64ASM": [ + "str q16, [x4]" + ] + }, + "vmovupd [rax], ymm0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x11 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "str q16, [x4]", + "str q2, [x4, #16]" + ] + }, + "vmovss [rax], xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b10 0x11 128-bit" + ], + "ExpectedArm64ASM": [ + "str s16, [x4]" + ] + }, + "db 0xc5, 0xf2, 0x11, 0xc2": { + "ExpectedInstructionCount": 4, + "Comment": [ + "vmovss xmm2, xmm1, xmm0", + "Need to manually encode since nasm won't encode this", + "Map 1 0b10 0x11 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v18.16b, v17.16b", + "mov v18.s[0], v16.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #48]" + ] + }, + "vmovsd [rax], xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b11 0x11 128-bit" + ], + "ExpectedArm64ASM": [ + "str d16, [x4]" + ] + }, + "db 0xc5, 0xf3, 0x11, 0xc2": { + "ExpectedInstructionCount": 4, + "Comment": [ + "vmovsd xmm2, xmm1, xmm0", + "Need to manually encode since nasm won't encode this", + "Insert in to first element could be more optimal, which is the common case.", + "Map 1 0b11 0x11 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v18.16b, v17.16b", + "mov v18.d[0], v16.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #48]" + ] + }, + "vmovlps xmm0, xmm1, [rax]": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Insert in to first element could be more optimal, which is the common case.", + "Map 1 0b00 0x12 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x4]", + "mov v16.16b, v2.16b", + "mov v16.d[1], v17.d[1]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovlpd xmm0, xmm1, [rax]": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Insert in to first element could be more optimal, which is the common case.", + "Map 1 0b01 0x12 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x4]", + "mov v16.16b, v2.16b", + "mov v16.d[1], v17.d[1]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovsldup xmm0, [rax]": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b10 0x12 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x4]", + "trn1 v16.4s, v2.4s, v2.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovsldup ymm0, [rax]": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b10 0x12 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x4]", + "ldr q3, [x4, #16]", + "trn1 v16.4s, v2.4s, v2.4s", + "trn1 v2.4s, v3.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vmovddup xmm0, [rax]": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b11 0x12 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr d2, [x4]", + "dup v16.2d, v2.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovddup ymm0, [rax]": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b11 0x12 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x4]", + "ldr q3, [x4, #16]", + "dup v16.2d, v2.d[0]", + "dup v2.2d, v3.d[0]", + "str q2, [x28, #16]" + ] + }, + "vmovlps [rax], xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b00 0x13 128-bit" + ], + "ExpectedArm64ASM": [ + "str d16, [x4]" + ] + }, + "vmovlpd [rax], xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b01 0x13 128-bit" + ], + "ExpectedArm64ASM": [ + "str d16, [x4]" + ] + }, + "vunpcklps xmm0, xmm1, [rax]": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b00 0x14 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x4]", + "zip1 v16.4s, v17.4s, v2.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vunpcklps ymm0, ymm1, [rax]": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b00 0x14 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x4]", + "ldr q4, [x4, #16]", + "zip1 v16.4s, v17.4s, v3.4s", + "zip1 v2.4s, v2.4s, v4.4s", + "str q2, [x28, #16]" + ] + }, + "vunpcklpd xmm0, xmm1, [rax]": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0x14 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x4]", + "zip1 v16.2d, v17.2d, v2.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vunpcklpd ymm0, ymm1, [rax]": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b01 0x14 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x4]", + "ldr q4, [x4, #16]", + "zip1 v16.2d, v17.2d, v3.2d", + "zip1 v2.2d, v2.2d, v4.2d", + "str q2, [x28, #16]" + ] + }, + "vunpckhps xmm0, xmm1, [rax]": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b00 0x15 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x4]", + "zip2 v16.4s, v17.4s, v2.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vunpckhps ymm0, ymm1, [rax]": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b00 0x15 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x4]", + "ldr q4, [x4, #16]", + "zip2 v16.4s, v17.4s, v3.4s", + "zip2 v2.4s, v2.4s, v4.4s", + "str q2, [x28, #16]" + ] + }, + "vunpckhpd xmm0, xmm1, [rax]": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0x15 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x4]", + "zip2 v16.2d, v17.2d, v2.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vunpckhpd ymm0, ymm1, [rax]": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b01 0x15 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x4]", + "ldr q4, [x4, #16]", + "zip2 v16.2d, v17.2d, v3.2d", + "zip2 v2.2d, v2.2d, v4.2d", + "str q2, [x28, #16]" + ] + }, + "vmovhps xmm0, xmm1, [rax]": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b00 0x16 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x4]", + "zip1 v16.2d, v17.2d, v2.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovhpd xmm0, xmm1, [rax]": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0x16 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x4]", + "zip1 v16.2d, v17.2d, v2.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovshdup xmm0, [rax]": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b10 0x16 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x4]", + "trn2 v16.4s, v2.4s, v2.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovshdup ymm0, [rax]": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b10 0x16 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x4]", + "ldr q3, [x4, #16]", + "trn2 v16.4s, v2.4s, v2.4s", + "trn2 v2.4s, v3.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vmovhps [rax], xmm0": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Can be more optimal with an element store.", + "Map 1 0b00 0x17 128-bit" + ], + "ExpectedArm64ASM": [ + "dup v2.2d, v16.d[1]", + "str d2, [x4]" + ] + }, + "vmovhpd [rax], xmm0": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Can be more optimal with an element store.", + "Map 1 0b01 0x17 128-bit" + ], + "ExpectedArm64ASM": [ + "dup v2.2d, v16.d[1]", + "str d2, [x4]" + ] + }, + "vmovmskps rax, xmm0": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0x50 128-bit" + ], + "ExpectedArm64ASM": [ + "ushr v2.4s, v16.4s, #31", + "ldr q3, [x28, #2512]", + "ushl v2.4s, v2.4s, v3.4s", + "addv s2, v2.4s", + "mov w4, v2.s[0]" + ] + }, + "vmovmskps rax, ymm0": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 1 0b00 0x50 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ushr v3.4s, v16.4s, #31", + "ldr q4, [x28, #2512]", + "ushl v3.4s, v3.4s, v4.4s", + "addv s3, v3.4s", + "mov w20, v3.s[0]", + "ushr v2.4s, v2.4s, #31", + "ushl v2.4s, v2.4s, v4.4s", + "addv s2, v2.4s", + "mov w21, v2.s[0]", + "orr x4, x20, x21, lsl #4" + ] + }, + "vmovmskpd rax, xmm0": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0x50 128-bit" + ], + "ExpectedArm64ASM": [ + "uzp2 v2.4s, v16.4s, v16.4s", + "mov x20, v2.d[0]", + "bfi x20, x20, #31, #32", + "lsr x4, x20, #62" + ] + }, + "vmovmskpd rax, ymm0": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 1 0b01 0x50 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "uzp2 v3.4s, v16.4s, v16.4s", + "mov x20, v3.d[0]", + "bfi x20, x20, #31, #32", + "lsr x20, x20, #62", + "uzp2 v2.4s, v2.4s, v2.4s", + "mov x21, v2.d[0]", + "bfi x21, x21, #31, #32", + "lsr x21, x21, #62", + "orr x4, x20, x21, lsl #2" + ] + }, + "vsqrtps xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x51 128-bit" + ], + "ExpectedArm64ASM": [ + "fsqrt v16.4s, v17.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vsqrtps ymm0, ymm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b00 0x51 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "fsqrt v16.4s, v17.4s", + "fsqrt v2.4s, v2.4s", + "str q2, [x28, #16]" + ] + }, + "vsqrtpd xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x51 128-bit" + ], + "ExpectedArm64ASM": [ + "fsqrt v16.2d, v17.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vsqrtpd ymm0, ymm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0x51 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "fsqrt v16.2d, v17.2d", + "fsqrt v2.2d, v2.2d", + "str q2, [x28, #16]" + ] + }, + "vsqrtss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b10 0x51 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fsqrt s0, s18", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vsqrtsd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b11 0x51 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fsqrt d0, d18", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vrsqrtps xmm0, xmm1": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0x52 128-bit" + ], + "ExpectedArm64ASM": [ + "fmov v0.4s, #0x70 (1.0000)", + "fsqrt v1.4s, v17.4s", + "fdiv v16.4s, v0.4s, v1.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vrsqrtps ymm0, ymm1": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b00 0x52 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "fmov v0.4s, #0x70 (1.0000)", + "fsqrt v1.4s, v17.4s", + "fdiv v16.4s, v0.4s, v1.4s", + "fmov v0.4s, #0x70 (1.0000)", + "fsqrt v1.4s, v2.4s", + "fdiv v2.4s, v0.4s, v1.4s", + "str q2, [x28, #16]" + ] + }, + "vrsqrtss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b10 0x52 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fmov s0, #0x70 (1.0000)", + "fsqrt s1, s18", + "fdiv s0, s0, s1", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vrcpps xmm0, xmm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b00 0x53 128-bit" + ], + "ExpectedArm64ASM": [ + "fmov v0.4s, #0x70 (1.0000)", + "fdiv v16.4s, v0.4s, v17.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vrcpps ymm0, ymm1": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b00 0x53 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "fmov v0.4s, #0x70 (1.0000)", + "fdiv v16.4s, v0.4s, v17.4s", + "fmov v0.4s, #0x70 (1.0000)", + "fdiv v2.4s, v0.4s, v2.4s", + "str q2, [x28, #16]" + ] + }, + "vrcpss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b10 0x53 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fmov s0, #0x70 (1.0000)", + "fdiv s0, s0, s18", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vandps xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x54 128-bit" + ], + "ExpectedArm64ASM": [ + "and v16.16b, v16.16b, v17.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vandps ymm0, ymm1": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0x54 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "and v16.16b, v16.16b, v17.16b", + "and v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vandpd xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x54 128-bit" + ], + "ExpectedArm64ASM": [ + "and v16.16b, v16.16b, v17.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vandpd ymm0, ymm1": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x54 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "and v16.16b, v16.16b, v17.16b", + "and v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vandnps xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x55 128-bit" + ], + "ExpectedArm64ASM": [ + "bic v16.16b, v17.16b, v16.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vandnps ymm0, ymm1": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0x55 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "bic v16.16b, v17.16b, v16.16b", + "bic v2.16b, v3.16b, v2.16b", + "str q2, [x28, #16]" + ] + }, + "vandnpd xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x55 128-bit" + ], + "ExpectedArm64ASM": [ + "bic v16.16b, v17.16b, v16.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vandnpd ymm0, ymm1": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x55 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "bic v16.16b, v17.16b, v16.16b", + "bic v2.16b, v3.16b, v2.16b", + "str q2, [x28, #16]" + ] + }, + "vorps xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x56 128-bit" + ], + "ExpectedArm64ASM": [ + "orr v16.16b, v16.16b, v17.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vorps ymm0, ymm1": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0x56 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "orr v16.16b, v16.16b, v17.16b", + "orr v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vorpd xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x56 128-bit" + ], + "ExpectedArm64ASM": [ + "orr v16.16b, v16.16b, v17.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vorpd ymm0, ymm1": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x56 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "orr v16.16b, v16.16b, v17.16b", + "orr v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vxorps xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x57 128-bit" + ], + "ExpectedArm64ASM": [ + "eor v16.16b, v16.16b, v17.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vxorps ymm0, ymm1": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0x57 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "eor v16.16b, v16.16b, v17.16b", + "eor v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vxorpd xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x57 128-bit" + ], + "ExpectedArm64ASM": [ + "eor v16.16b, v16.16b, v17.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vxorpd ymm0, ymm1": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x57 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "eor v16.16b, v16.16b, v17.16b", + "eor v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpunpcklbw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x60 128-bit" + ], + "ExpectedArm64ASM": [ + "zip1 v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpunpcklbw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x60 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "zip1 v16.16b, v17.16b, v18.16b", + "zip1 v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpunpcklwd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x61 128-bit" + ], + "ExpectedArm64ASM": [ + "zip1 v16.8h, v17.8h, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpunpcklwd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x61 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "zip1 v16.8h, v17.8h, v18.8h", + "zip1 v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpunpckldq xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x62 128-bit" + ], + "ExpectedArm64ASM": [ + "zip1 v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpunpckldq ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x62 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "zip1 v16.4s, v17.4s, v18.4s", + "zip1 v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpacksswb xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0x63 128-bit" + ], + "ExpectedArm64ASM": [ + "sqxtn v16.8b, v17.8h", + "sqxtn2 v16.16b, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpacksswb ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b01 0x63 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "sqxtn v16.8b, v17.8h", + "sqxtn2 v16.16b, v18.8h", + "sqxtn v2.8b, v2.8h", + "sqxtn2 v2.16b, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpcmpgtb xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x64 128-bit" + ], + "ExpectedArm64ASM": [ + "cmgt v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpcmpgtb ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x64 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "cmgt v16.16b, v17.16b, v18.16b", + "cmgt v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpcmpgtw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x65 128-bit" + ], + "ExpectedArm64ASM": [ + "cmgt v16.8h, v17.8h, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpcmpgtw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x65 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "cmgt v16.8h, v17.8h, v18.8h", + "cmgt v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpcmpgtd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x66 128-bit" + ], + "ExpectedArm64ASM": [ + "cmgt v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpcmpgtd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x66 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "cmgt v16.4s, v17.4s, v18.4s", + "cmgt v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpackuswb xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0x67 128-bit" + ], + "ExpectedArm64ASM": [ + "sqxtun v16.8b, v17.8h", + "sqxtun2 v16.16b, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpackuswb ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b01 0x67 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "sqxtun v16.8b, v17.8h", + "sqxtun2 v16.16b, v18.8h", + "sqxtun v2.8b, v2.8h", + "sqxtun2 v2.16b, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpshufd xmm0, xmm1, 00b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b01 0x70 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", + "mov v2.s[0], v17.s[0]", + "mov v2.s[1], v17.s[0]", + "mov v2.s[2], v17.s[0]", + "mov v16.16b, v2.16b", + "mov v16.s[3], v17.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpshufd xmm0, xmm1, 01b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b01 0x70 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", + "mov v2.s[0], v17.s[1]", + "mov v2.s[1], v17.s[0]", + "mov v2.s[2], v17.s[0]", + "mov v16.16b, v2.16b", + "mov v16.s[3], v17.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpshufd xmm0, xmm1, 10b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b01 0x70 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", + "mov v2.s[0], v17.s[2]", + "mov v2.s[1], v17.s[0]", + "mov v2.s[2], v17.s[0]", + "mov v16.16b, v2.16b", + "mov v16.s[3], v17.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpshufd xmm0, xmm1, 11b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b01 0x70 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", + "mov v2.s[0], v17.s[3]", + "mov v2.s[1], v17.s[0]", + "mov v2.s[2], v17.s[0]", + "mov v16.16b, v2.16b", + "mov v16.s[3], v17.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpshufd ymm0, ymm1, 00b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 1 0b01 0x70 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v3.16b, v17.16b", + "mov v3.s[0], v17.s[0]", + "mov v3.s[1], v17.s[0]", + "mov v3.s[2], v17.s[0]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[0]", + "mov v3.16b, v2.16b", + "mov v3.s[0], v2.s[0]", + "mov v3.s[1], v2.s[0]", + "mov v3.s[2], v2.s[0]", + "mov v3.s[3], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vpshufd ymm0, ymm1, 01b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 1 0b01 0x70 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v3.16b, v17.16b", + "mov v3.s[0], v17.s[1]", + "mov v3.s[1], v17.s[0]", + "mov v3.s[2], v17.s[0]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[0]", + "mov v3.16b, v2.16b", + "mov v3.s[0], v2.s[1]", + "mov v3.s[1], v2.s[0]", + "mov v3.s[2], v2.s[0]", + "mov v3.s[3], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vpshufd ymm0, ymm1, 10b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 1 0b01 0x70 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v3.16b, v17.16b", + "mov v3.s[0], v17.s[2]", + "mov v3.s[1], v17.s[0]", + "mov v3.s[2], v17.s[0]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[0]", + "mov v3.16b, v2.16b", + "mov v3.s[0], v2.s[2]", + "mov v3.s[1], v2.s[0]", + "mov v3.s[2], v2.s[0]", + "mov v3.s[3], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vpshufd ymm0, ymm1, 11b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 1 0b01 0x70 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v3.16b, v17.16b", + "mov v3.s[0], v17.s[3]", + "mov v3.s[1], v17.s[0]", + "mov v3.s[2], v17.s[0]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[0]", + "mov v3.16b, v2.16b", + "mov v3.s[0], v2.s[3]", + "mov v3.s[1], v2.s[0]", + "mov v3.s[2], v2.s[0]", + "mov v3.s[3], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vpshufhw xmm0, xmm1, 00b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b10 0x70 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", + "mov v2.h[4], v17.h[4]", + "mov v2.h[5], v17.h[4]", + "mov v2.h[6], v17.h[4]", + "mov v16.16b, v2.16b", + "mov v16.h[7], v17.h[4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpshufhw xmm0, xmm1, 01b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b10 0x70 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", + "mov v2.h[4], v17.h[5]", + "mov v2.h[5], v17.h[4]", + "mov v2.h[6], v17.h[4]", + "mov v16.16b, v2.16b", + "mov v16.h[7], v17.h[4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpshufhw xmm0, xmm1, 10b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b10 0x70 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", + "mov v2.h[4], v17.h[6]", + "mov v2.h[5], v17.h[4]", + "mov v2.h[6], v17.h[4]", + "mov v16.16b, v2.16b", + "mov v16.h[7], v17.h[4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpshufhw xmm0, xmm1, 11b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b10 0x70 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", + "mov v2.h[4], v17.h[7]", + "mov v2.h[5], v17.h[4]", + "mov v2.h[6], v17.h[4]", + "mov v16.16b, v2.16b", + "mov v16.h[7], v17.h[4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpshufhw ymm0, ymm1, 00b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 1 0b10 0x70 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v3.16b, v17.16b", + "mov v3.h[4], v17.h[4]", + "mov v3.h[5], v17.h[4]", + "mov v3.h[6], v17.h[4]", + "mov v16.16b, v3.16b", + "mov v16.h[7], v17.h[4]", + "mov v3.16b, v2.16b", + "mov v3.h[4], v2.h[4]", + "mov v3.h[5], v2.h[4]", + "mov v3.h[6], v2.h[4]", + "mov v3.h[7], v2.h[4]", + "str q3, [x28, #16]" + ] + }, + "vpshufhw ymm0, ymm1, 01b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 1 0b10 0x70 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v3.16b, v17.16b", + "mov v3.h[4], v17.h[5]", + "mov v3.h[5], v17.h[4]", + "mov v3.h[6], v17.h[4]", + "mov v16.16b, v3.16b", + "mov v16.h[7], v17.h[4]", + "mov v3.16b, v2.16b", + "mov v3.h[4], v2.h[5]", + "mov v3.h[5], v2.h[4]", + "mov v3.h[6], v2.h[4]", + "mov v3.h[7], v2.h[4]", + "str q3, [x28, #16]" + ] + }, + "vpshufhw ymm0, ymm1, 10b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 1 0b10 0x70 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v3.16b, v17.16b", + "mov v3.h[4], v17.h[6]", + "mov v3.h[5], v17.h[4]", + "mov v3.h[6], v17.h[4]", + "mov v16.16b, v3.16b", + "mov v16.h[7], v17.h[4]", + "mov v3.16b, v2.16b", + "mov v3.h[4], v2.h[6]", + "mov v3.h[5], v2.h[4]", + "mov v3.h[6], v2.h[4]", + "mov v3.h[7], v2.h[4]", + "str q3, [x28, #16]" + ] + }, + "vpshufhw ymm0, ymm1, 11b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 1 0b10 0x70 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v3.16b, v17.16b", + "mov v3.h[4], v17.h[7]", + "mov v3.h[5], v17.h[4]", + "mov v3.h[6], v17.h[4]", + "mov v16.16b, v3.16b", + "mov v16.h[7], v17.h[4]", + "mov v3.16b, v2.16b", + "mov v3.h[4], v2.h[7]", + "mov v3.h[5], v2.h[4]", + "mov v3.h[6], v2.h[4]", + "mov v3.h[7], v2.h[4]", + "str q3, [x28, #16]" + ] + }, + "vpshuflw xmm0, xmm1, 00b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b11 0x70 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", + "mov v2.h[0], v17.h[0]", + "mov v2.h[1], v17.h[0]", + "mov v2.h[2], v17.h[0]", + "mov v16.16b, v2.16b", + "mov v16.h[3], v17.h[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpshuflw xmm0, xmm1, 01b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b11 0x70 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", + "mov v2.h[0], v17.h[1]", + "mov v2.h[1], v17.h[0]", + "mov v2.h[2], v17.h[0]", + "mov v16.16b, v2.16b", + "mov v16.h[3], v17.h[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpshuflw xmm0, xmm1, 10b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b11 0x70 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", + "mov v2.h[0], v17.h[2]", + "mov v2.h[1], v17.h[0]", + "mov v2.h[2], v17.h[0]", + "mov v16.16b, v2.16b", + "mov v16.h[3], v17.h[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpshuflw xmm0, xmm1, 11b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b11 0x70 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v2.16b, v17.16b", + "mov v2.h[0], v17.h[3]", + "mov v2.h[1], v17.h[0]", + "mov v2.h[2], v17.h[0]", + "mov v16.16b, v2.16b", + "mov v16.h[3], v17.h[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpshuflw ymm0, ymm1, 00b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 1 0b11 0x70 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v3.16b, v17.16b", + "mov v3.h[0], v17.h[0]", + "mov v3.h[1], v17.h[0]", + "mov v3.h[2], v17.h[0]", + "mov v16.16b, v3.16b", + "mov v16.h[3], v17.h[0]", + "mov v3.16b, v2.16b", + "mov v3.h[0], v2.h[0]", + "mov v3.h[1], v2.h[0]", + "mov v3.h[2], v2.h[0]", + "mov v3.h[3], v2.h[0]", + "str q3, [x28, #16]" + ] + }, + "vpshuflw ymm0, ymm1, 01b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 1 0b11 0x70 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v3.16b, v17.16b", + "mov v3.h[0], v17.h[1]", + "mov v3.h[1], v17.h[0]", + "mov v3.h[2], v17.h[0]", + "mov v16.16b, v3.16b", + "mov v16.h[3], v17.h[0]", + "mov v3.16b, v2.16b", + "mov v3.h[0], v2.h[1]", + "mov v3.h[1], v2.h[0]", + "mov v3.h[2], v2.h[0]", + "mov v3.h[3], v2.h[0]", + "str q3, [x28, #16]" + ] + }, + "vpshuflw ymm0, ymm1, 10b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 1 0b11 0x70 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v3.16b, v17.16b", + "mov v3.h[0], v17.h[2]", + "mov v3.h[1], v17.h[0]", + "mov v3.h[2], v17.h[0]", + "mov v16.16b, v3.16b", + "mov v16.h[3], v17.h[0]", + "mov v3.16b, v2.16b", + "mov v3.h[0], v2.h[2]", + "mov v3.h[1], v2.h[0]", + "mov v3.h[2], v2.h[0]", + "mov v3.h[3], v2.h[0]", + "str q3, [x28, #16]" + ] + }, + "vpshuflw ymm0, ymm1, 11b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 1 0b11 0x70 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v3.16b, v17.16b", + "mov v3.h[0], v17.h[3]", + "mov v3.h[1], v17.h[0]", + "mov v3.h[2], v17.h[0]", + "mov v16.16b, v3.16b", + "mov v16.h[3], v17.h[0]", + "mov v3.16b, v2.16b", + "mov v3.h[0], v2.h[3]", + "mov v3.h[1], v2.h[0]", + "mov v3.h[2], v2.h[0]", + "mov v3.h[3], v2.h[0]", + "str q3, [x28, #16]" + ] + }, + "vpcmpeqb xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x74 128-bit" + ], + "ExpectedArm64ASM": [ + "cmeq v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpcmpeqb ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x74 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "cmeq v16.16b, v17.16b, v18.16b", + "cmeq v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpcmpeqw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x75 128-bit" + ], + "ExpectedArm64ASM": [ + "cmeq v16.8h, v17.8h, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpcmpeqw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x75 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "cmeq v16.8h, v17.8h, v18.8h", + "cmeq v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpcmpeqd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x76 128-bit" + ], + "ExpectedArm64ASM": [ + "cmeq v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpcmpeqd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x76 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "cmeq v16.4s, v17.4s, v18.4s", + "cmeq v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vzeroupper": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Might be able to use DZ ZVA", + "Map 1 0b01 0x77 L=0" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "str q2, [x28, #16]", + "str q2, [x28, #32]", + "str q2, [x28, #48]", + "str q2, [x28, #64]", + "str q2, [x28, #80]", + "str q2, [x28, #96]", + "str q2, [x28, #112]", + "str q2, [x28, #128]", + "str q2, [x28, #144]", + "str q2, [x28, #160]", + "str q2, [x28, #176]", + "str q2, [x28, #192]", + "str q2, [x28, #208]", + "str q2, [x28, #224]", + "str q2, [x28, #240]", + "str q2, [x28, #256]" + ] + }, + "vzeroall": { + "ExpectedInstructionCount": 33, + "Comment": [ + "Might be able to use DZ ZVA", + "Map 1 0b01 0x77 L=1" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "movi v17.2d, #0x0", + "movi v18.2d, #0x0", + "movi v19.2d, #0x0", + "movi v20.2d, #0x0", + "movi v21.2d, #0x0", + "movi v22.2d, #0x0", + "movi v23.2d, #0x0", + "movi v24.2d, #0x0", + "movi v25.2d, #0x0", + "movi v26.2d, #0x0", + "movi v27.2d, #0x0", + "movi v28.2d, #0x0", + "movi v29.2d, #0x0", + "movi v30.2d, #0x0", + "movi v31.2d, #0x0", + "movi v2.2d, #0x0", + "str q2, [x28, #16]", + "str q2, [x28, #32]", + "str q2, [x28, #48]", + "str q2, [x28, #64]", + "str q2, [x28, #80]", + "str q2, [x28, #96]", + "str q2, [x28, #112]", + "str q2, [x28, #128]", + "str q2, [x28, #144]", + "str q2, [x28, #160]", + "str q2, [x28, #176]", + "str q2, [x28, #192]", + "str q2, [x28, #208]", + "str q2, [x28, #224]", + "str q2, [x28, #240]", + "str q2, [x28, #256]" + ] + }, + "vcmpps xmm0, xmm1, xmm2, 0x00": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmeq v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpps ymm0, ymm1, ymm2, 0x00": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0xC2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmeq v16.4s, v17.4s, v18.4s", + "fcmeq v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vcmpps xmm0, xmm1, xmm2, 0x01": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmgt v16.4s, v18.4s, v17.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpps ymm0, ymm1, ymm2, 0x01": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0xC2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmgt v16.4s, v18.4s, v17.4s", + "fcmgt v2.4s, v3.4s, v2.4s", + "str q2, [x28, #16]" + ] + }, + "vcmpps xmm0, xmm1, xmm2, 0x02": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmge v16.4s, v18.4s, v17.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpps ymm0, ymm1, ymm2, 0x02": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0xC2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmge v16.4s, v18.4s, v17.4s", + "fcmge v2.4s, v3.4s, v2.4s", + "str q2, [x28, #16]" + ] + }, + "vcmpps xmm0, xmm1, xmm2, 0x03": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b00 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmge v0.4s, v17.4s, v18.4s", + "fcmgt v1.4s, v18.4s, v17.4s", + "orr v16.16b, v0.16b, v1.16b", + "mvn v16.16b, v16.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpps ymm0, ymm1, ymm2, 0x03": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 1 0b00 0xC2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmge v0.4s, v17.4s, v18.4s", + "fcmgt v1.4s, v18.4s, v17.4s", + "orr v16.16b, v0.16b, v1.16b", + "mvn v16.16b, v16.16b", + "fcmge v0.4s, v2.4s, v3.4s", + "fcmgt v1.4s, v3.4s, v2.4s", + "orr v2.16b, v0.16b, v1.16b", + "mvn v2.16b, v2.16b", + "str q2, [x28, #16]" + ] + }, + "vcmpps xmm0, xmm1, xmm2, 0x04": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b00 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmeq v16.4s, v17.4s, v18.4s", + "mvn v16.16b, v16.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpps ymm0, ymm1, ymm2, 0x04": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b00 0xC2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmeq v16.4s, v17.4s, v18.4s", + "mvn v16.16b, v16.16b", + "fcmeq v2.4s, v2.4s, v3.4s", + "mvn v2.16b, v2.16b", + "str q2, [x28, #16]" + ] + }, + "vcmpps xmm0, xmm1, xmm2, 0x05": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b00 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmgt v2.4s, v18.4s, v17.4s", + "mvn v16.16b, v2.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpps ymm0, ymm1, ymm2, 0x05": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b00 0xC2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmgt v4.4s, v18.4s, v17.4s", + "mvn v16.16b, v4.16b", + "fcmgt v2.4s, v3.4s, v2.4s", + "mvn v2.16b, v2.16b", + "str q2, [x28, #16]" + ] + }, + "vcmpps xmm0, xmm1, xmm2, 0x06": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b00 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmge v2.4s, v18.4s, v17.4s", + "mvn v16.16b, v2.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpps ymm0, ymm1, ymm2, 0x06": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b00 0xC2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmge v4.4s, v18.4s, v17.4s", + "mvn v16.16b, v4.16b", + "fcmge v2.4s, v3.4s, v2.4s", + "mvn v2.16b, v2.16b", + "str q2, [x28, #16]" + ] + }, + "vcmpps xmm0, xmm1, xmm2, 0x07": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmge v0.4s, v17.4s, v18.4s", + "fcmgt v1.4s, v18.4s, v17.4s", + "orr v16.16b, v0.16b, v1.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpps ymm0, ymm1, ymm2, 0x07": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 1 0b00 0xC2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmge v0.4s, v17.4s, v18.4s", + "fcmgt v1.4s, v18.4s, v17.4s", + "orr v16.16b, v0.16b, v1.16b", + "fcmge v0.4s, v2.4s, v3.4s", + "fcmgt v1.4s, v3.4s, v2.4s", + "orr v2.16b, v0.16b, v1.16b", + "str q2, [x28, #16]" + ] + }, + "vcmppd xmm0, xmm1, xmm2, 0x00": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmeq v16.2d, v17.2d, v18.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmppd ymm0, ymm1, ymm2, 0x00": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xC2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmeq v16.2d, v17.2d, v18.2d", + "fcmeq v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vcmppd xmm0, xmm1, xmm2, 0x01": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmgt v16.2d, v18.2d, v17.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmppd ymm0, ymm1, ymm2, 0x01": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xC2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmgt v16.2d, v18.2d, v17.2d", + "fcmgt v2.2d, v3.2d, v2.2d", + "str q2, [x28, #16]" + ] + }, + "vcmppd xmm0, xmm1, xmm2, 0x02": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmge v16.2d, v18.2d, v17.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmppd ymm0, ymm1, ymm2, 0x02": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xC2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmge v16.2d, v18.2d, v17.2d", + "fcmge v2.2d, v3.2d, v2.2d", + "str q2, [x28, #16]" + ] + }, + "vcmppd xmm0, xmm1, xmm2, 0x03": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b01 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmge v0.2d, v17.2d, v18.2d", + "fcmgt v1.2d, v18.2d, v17.2d", + "orr v16.16b, v0.16b, v1.16b", + "mvn v16.16b, v16.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmppd ymm0, ymm1, ymm2, 0x03": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 1 0b01 0xC2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmge v0.2d, v17.2d, v18.2d", + "fcmgt v1.2d, v18.2d, v17.2d", + "orr v16.16b, v0.16b, v1.16b", + "mvn v16.16b, v16.16b", + "fcmge v0.2d, v2.2d, v3.2d", + "fcmgt v1.2d, v3.2d, v2.2d", + "orr v2.16b, v0.16b, v1.16b", + "mvn v2.16b, v2.16b", + "str q2, [x28, #16]" + ] + }, + "vcmppd xmm0, xmm1, xmm2, 0x04": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmeq v16.2d, v17.2d, v18.2d", + "mvn v16.16b, v16.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmppd ymm0, ymm1, ymm2, 0x04": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b01 0xC2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmeq v16.2d, v17.2d, v18.2d", + "mvn v16.16b, v16.16b", + "fcmeq v2.2d, v2.2d, v3.2d", + "mvn v2.16b, v2.16b", + "str q2, [x28, #16]" + ] + }, + "vcmppd xmm0, xmm1, xmm2, 0x05": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmgt v2.2d, v18.2d, v17.2d", + "mvn v16.16b, v2.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmppd ymm0, ymm1, ymm2, 0x05": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b01 0xC2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmgt v4.2d, v18.2d, v17.2d", + "mvn v16.16b, v4.16b", + "fcmgt v2.2d, v3.2d, v2.2d", + "mvn v2.16b, v2.16b", + "str q2, [x28, #16]" + ] + }, + "vcmppd xmm0, xmm1, xmm2, 0x06": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmge v2.2d, v18.2d, v17.2d", + "mvn v16.16b, v2.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmppd ymm0, ymm1, ymm2, 0x06": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b01 0xC2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmge v4.2d, v18.2d, v17.2d", + "mvn v16.16b, v4.16b", + "fcmge v2.2d, v3.2d, v2.2d", + "mvn v2.16b, v2.16b", + "str q2, [x28, #16]" + ] + }, + "vcmppd xmm0, xmm1, xmm2, 0x07": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmge v0.2d, v17.2d, v18.2d", + "fcmgt v1.2d, v18.2d, v17.2d", + "orr v16.16b, v0.16b, v1.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmppd ymm0, ymm1, ymm2, 0x07": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 1 0b01 0xC2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmge v0.2d, v17.2d, v18.2d", + "fcmgt v1.2d, v18.2d, v17.2d", + "orr v16.16b, v0.16b, v1.16b", + "fcmge v0.2d, v2.2d, v3.2d", + "fcmgt v1.2d, v3.2d, v2.2d", + "orr v2.16b, v0.16b, v1.16b", + "str q2, [x28, #16]" + ] + }, + "vcmpss xmm0, xmm1, xmm2, 0x00": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b10 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fcmeq s0, s18, s17", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpss xmm0, xmm1, xmm2, 0x01": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b10 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fcmgt s0, s18, s17", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpss xmm0, xmm1, xmm2, 0x02": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b10 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fcmge s0, s18, s17", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpss xmm0, xmm1, xmm2, 0x03": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b10 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fcmge s0, s17, s18", + "fcmgt s1, s18, s17", + "orr v0.8b, v0.8b, v1.8b", + "mvn v0.8b, v0.8b", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpss xmm0, xmm1, xmm2, 0x04": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b10 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fcmeq s0, s18, s17", + "mvn v0.8b, v0.8b", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpss xmm0, xmm1, xmm2, 0x05": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b10 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmgt s2, s18, s17", + "mvn v2.16b, v2.16b", + "mov v16.16b, v17.16b", + "mov v16.s[0], v2.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpss xmm0, xmm1, xmm2, 0x06": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b10 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmge s2, s18, s17", + "mvn v2.16b, v2.16b", + "mov v16.16b, v17.16b", + "mov v16.s[0], v2.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpss xmm0, xmm1, xmm2, 0x07": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b10 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fcmge s0, s17, s18", + "fcmgt s1, s18, s17", + "orr v0.8b, v0.8b, v1.8b", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpsd xmm0, xmm1, xmm2, 0x00": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b11 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fcmeq d0, d18, d17", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpsd xmm0, xmm1, xmm2, 0x01": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b11 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fcmgt d0, d18, d17", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpsd xmm0, xmm1, xmm2, 0x02": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b11 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fcmge d0, d18, d17", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpsd xmm0, xmm1, xmm2, 0x03": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b11 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fcmge d0, d17, d18", + "fcmgt d1, d18, d17", + "orr v0.8b, v0.8b, v1.8b", + "mvn v0.8b, v0.8b", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpsd xmm0, xmm1, xmm2, 0x04": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b11 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fcmeq d0, d18, d17", + "mvn v0.8b, v0.8b", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpsd xmm0, xmm1, xmm2, 0x05": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b11 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmgt d2, d18, d17", + "mvn v2.16b, v2.16b", + "mov v16.16b, v17.16b", + "mov v16.d[0], v2.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpsd xmm0, xmm1, xmm2, 0x06": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b11 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmge d2, d18, d17", + "mvn v2.16b, v2.16b", + "mov v16.16b, v17.16b", + "mov v16.d[0], v2.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcmpsd xmm0, xmm1, xmm2, 0x07": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b11 0xC2 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fcmge d0, d17, d18", + "fcmgt d1, d18, d17", + "orr v0.8b, v0.8b, v1.8b", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpinsrw xmm0, xmm0, eax, 000b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xC4 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.h[0], w4", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpinsrw xmm0, xmm1, eax, 000b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0xC4 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "mov v16.h[0], w4", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpinsrw xmm0, xmm1, eax, 001b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0xC4 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "mov v16.h[1], w4", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpinsrw xmm0, xmm1, eax, 111b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0xC4 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "mov v16.h[7], w4", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpextrw eax, xmm0, 000b": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b01 0xC5 128-bit" + ], + "ExpectedArm64ASM": [ + "umov w4, v16.h[0]" + ] + }, + "vpextrw eax, xmm0, 001b": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b01 0xC5 128-bit" + ], + "ExpectedArm64ASM": [ + "umov w4, v16.h[1]" + ] + }, + "vpextrw eax, xmm0, 111b": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b01 0xC5 128-bit" + ], + "ExpectedArm64ASM": [ + "umov w4, v16.h[7]" + ] + }, + "vpextrw [rax], xmm0, 000b": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b01 0xC5 128-bit" + ], + "ExpectedArm64ASM": [ + "st1 {v16.h}[0], [x4]" + ] + }, + "vpextrw [rax], xmm0, 001b": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b01 0xC5 128-bit" + ], + "ExpectedArm64ASM": [ + "st1 {v16.h}[1], [x4]" + ] + }, + "vpextrw [rax], xmm0, 111b": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b01 0xC5 128-bit" + ], + "ExpectedArm64ASM": [ + "st1 {v16.h}[7], [x4]" + ] + }, + "vshufps xmm0, xmm1, xmm2, 00b": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0xC6 128-bit" + ], + "ExpectedArm64ASM": [ + "dup v2.4s, v17.s[0]", + "dup v3.4s, v18.s[0]", + "zip1 v16.2d, v2.2d, v3.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vshufps ymm0, ymm1, ymm2, 00b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 1 0b00 0xC6 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "dup v4.4s, v17.s[0]", + "dup v5.4s, v18.s[0]", + "zip1 v16.2d, v4.2d, v5.2d", + "dup v2.4s, v2.s[0]", + "dup v3.4s, v3.s[0]", + "zip1 v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vshufps xmm0, xmm1, xmm2, 01b": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0xC6 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr x0, [x28, #2056]", + "ldr q2, [x0, #16]", + "tbl v16.16b, {v17.16b, v18.16b}, v2.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vshufps ymm0, ymm1, ymm2, 01b": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b00 0xC6 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "ldr x0, [x28, #2056]", + "ldr q4, [x0, #16]", + "tbl v16.16b, {v17.16b, v18.16b}, v4.16b", + "tbl v2.16b, {v2.16b, v3.16b}, v4.16b", + "str q2, [x28, #16]" + ] + }, + "vshufps xmm0, xmm1, xmm2, 10b": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0xC6 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr x0, [x28, #2056]", + "ldr q2, [x0, #32]", + "tbl v16.16b, {v17.16b, v18.16b}, v2.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vshufps ymm0, ymm1, ymm2, 10b": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b00 0xC6 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "ldr x0, [x28, #2056]", + "ldr q4, [x0, #32]", + "tbl v16.16b, {v17.16b, v18.16b}, v4.16b", + "tbl v2.16b, {v2.16b, v3.16b}, v4.16b", + "str q2, [x28, #16]" + ] + }, + "vshufps xmm0, xmm1, xmm2, 11b": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0xC6 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr x0, [x28, #2056]", + "ldr q2, [x0, #48]", + "tbl v16.16b, {v17.16b, v18.16b}, v2.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vshufps ymm0, ymm1, ymm2, 11b": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b00 0xC6 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "ldr x0, [x28, #2056]", + "ldr q4, [x0, #48]", + "tbl v16.16b, {v17.16b, v18.16b}, v4.16b", + "tbl v2.16b, {v2.16b, v3.16b}, v4.16b", + "str q2, [x28, #16]" + ] + }, + "vshufpd xmm0, xmm1, xmm2, 0b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xC6 128-bit" + ], + "ExpectedArm64ASM": [ + "zip1 v16.2d, v17.2d, v18.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vshufpd ymm0, ymm1, ymm2, 0b": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xC6 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "zip1 v16.2d, v17.2d, v18.2d", + "zip1 v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vshufpd xmm0, xmm1, xmm2, 1b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xC6 128-bit" + ], + "ExpectedArm64ASM": [ + "ext v16.16b, v17.16b, v18.16b, #8", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vshufpd ymm0, ymm1, ymm2, 1b": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xC6 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "ext v16.16b, v17.16b, v18.16b, #8", + "zip1 v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vmovaps xmm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x28 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovaps ymm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x28 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]", + "ldr q2, [x4, #16]", + "str q2, [x28, #16]" + ] + }, + "vmovaps xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x29 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vmovaps ymm0, ymm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x29 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vmovapd xmm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x28 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovapd ymm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x28 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]", + "ldr q2, [x4, #16]", + "str q2, [x28, #16]" + ] + }, + "vmovapd xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x29 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vmovapd ymm0, ymm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x29 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vmovaps [rax], xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b00 0x29 128-bit" + ], + "ExpectedArm64ASM": [ + "str q16, [x4]" + ] + }, + "vmovaps [rax], ymm0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x29 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "str q16, [x4]", + "str q2, [x4, #16]" + ] + }, + "vmovapd [rax], xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b01 0x29 128-bit" + ], + "ExpectedArm64ASM": [ + "str q16, [x4]" + ] + }, + "vmovapd [rax], ymm0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x29 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "str q16, [x4]", + "str q2, [x4, #16]" + ] + }, + "vcvtsi2ss xmm0, xmm1, eax": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b10 0x2A 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "scvtf s0, w4", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcvtsi2ss xmm0, xmm1, rax": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b10 0x2A 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "scvtf s0, x4", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcvtsi2sd xmm0, xmm1, eax": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b11 0x2A 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "scvtf d0, w4", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcvtsi2sd xmm0, xmm1, rax": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b11 0x2A 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "scvtf d0, x4", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovntps [rax], xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b00 0x2B 128-bit" + ], + "ExpectedArm64ASM": [ + "str q16, [x4]" + ] + }, + "vmovntps [rax], ymm0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x2B 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "str q16, [x4]", + "str q2, [x4, #16]" + ] + }, + "vmovntpd [rax], xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b01 0x2B 128-bit" + ], + "ExpectedArm64ASM": [ + "str q16, [x4]" + ] + }, + "vmovntpd [rax], ymm0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x2B 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "str q16, [x4]", + "str q2, [x4, #16]" + ] + }, + "vcvttss2si eax, xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b10 0x2c 128-bit" + ], + "ExpectedArm64ASM": [ + "fcvtzs w4, s16" + ] + }, + "vcvttss2si rax, xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b10 0x2c 128-bit" + ], + "ExpectedArm64ASM": [ + "fcvtzs x4, s16" + ] + }, + "vcvttsd2si eax, xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b11 0x2c 128-bit" + ], + "ExpectedArm64ASM": [ + "fcvtzs w4, d16" + ] + }, + "vcvttsd2si rax, xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b11 0x2c 128-bit" + ], + "ExpectedArm64ASM": [ + "fcvtzs x4, d16" + ] + }, + "vcvtss2si eax, xmm0": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 1 0b10 0x2d 128-bit" + ], + "ExpectedArm64ASM": [ + "frinti s0, s16", + "fcvtzs w4, s0" + ] + }, + "vcvtss2si rax, xmm0": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 1 0b10 0x2d 128-bit" + ], + "ExpectedArm64ASM": [ + "frinti s0, s16", + "fcvtzs x4, s0" + ] + }, + "vcvtsd2si eax, xmm0": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 1 0b11 0x2d 128-bit" + ], + "ExpectedArm64ASM": [ + "frinti d0, d16", + "fcvtzs x4, d0" + ] + }, + "vcvtsd2si rax, xmm0": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 1 0b11 0x2d 128-bit" + ], + "ExpectedArm64ASM": [ + "frinti d0, d16", + "fcvtzs x4, d0" + ] + }, + "vucomiss xmm0, xmm1": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 1 0b00 0x2e 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmp s16, s17", + "mov w27, #0x0", + "cset w20, eq", + "cset w21, lo", + "cset w22, vs", + "orr w21, w21, w22", + "lsl x21, x21, #29", + "orr w20, w20, w22", + "orr w20, w21, w20, lsl #30", + "eor w26, w22, #0x1", + "msr nzcv, x20" + ] + }, + "vucomisd xmm0, xmm1": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 1 0b01 0x2e 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmp d16, d17", + "mov w27, #0x0", + "cset w20, eq", + "cset w21, lo", + "cset w22, vs", + "orr w21, w21, w22", + "lsl x21, x21, #29", + "orr w20, w20, w22", + "orr w20, w21, w20, lsl #30", + "eor w26, w22, #0x1", + "msr nzcv, x20" + ] + }, + "vcomiss xmm0, xmm1": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 1 0b00 0x2f 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmp s16, s17", + "mov w27, #0x0", + "cset w20, eq", + "cset w21, lo", + "cset w22, vs", + "orr w21, w21, w22", + "lsl x21, x21, #29", + "orr w20, w20, w22", + "orr w20, w21, w20, lsl #30", + "eor w26, w22, #0x1", + "msr nzcv, x20" + ] + }, + "vcomisd xmm0, xmm1": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 1 0b01 0x2f 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmp d16, d17", + "mov w27, #0x0", + "cset w20, eq", + "cset w21, lo", + "cset w22, vs", + "orr w21, w21, w22", + "lsl x21, x21, #29", + "orr w20, w20, w22", + "orr w20, w21, w20, lsl #30", + "eor w26, w22, #0x1", + "msr nzcv, x20" + ] + }, + "vaddps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x58 128-bit" + ], + "ExpectedArm64ASM": [ + "fadd v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vaddps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0x58 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fadd v16.4s, v17.4s, v18.4s", + "fadd v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vaddpd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x58 128-bit" + ], + "ExpectedArm64ASM": [ + "fadd v16.2d, v17.2d, v18.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vaddpd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x58 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fadd v16.2d, v17.2d, v18.2d", + "fadd v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vaddss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b10 0x58 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fadd s0, s17, s18", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vaddsd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b11 0x58 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fadd d0, d17, d18", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmulps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x59 128-bit" + ], + "ExpectedArm64ASM": [ + "fmul v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmulps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0x59 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fmul v16.4s, v17.4s, v18.4s", + "fmul v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vmulpd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x59 128-bit" + ], + "ExpectedArm64ASM": [ + "fmul v16.2d, v17.2d, v18.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmulpd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x59 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fmul v16.2d, v17.2d, v18.2d", + "fmul v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vmulss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b10 0x59 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fmul s0, s17, s18", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmulsd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b11 0x59 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fmul d0, d17, d18", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcvtps2pd xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x5a 128-bit" + ], + "ExpectedArm64ASM": [ + "fcvtl v16.2d, v17.2s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcvtpd2ps xmm0, [rax]": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0x5a 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x4]", + "fcvtn v16.2s, v2.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcvtpd2ps xmm0, yword [rax]": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b01 0x5a 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x4]", + "ldr q3, [x4, #16]", + "fcvtn v2.2s, v2.2d", + "fcvtn v3.2s, v3.2d", + "mov v16.16b, v2.16b", + "mov v16.d[1], v3.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcvtpd2ps xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x5a 128-bit" + ], + "ExpectedArm64ASM": [ + "fcvtn v16.2s, v17.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcvtss2sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b10 0x5a 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fcvt d0, s18", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcvtsd2ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b11 0x5a 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fcvt s0, d18", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcvtdq2ps xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x5b 128-bit" + ], + "ExpectedArm64ASM": [ + "scvtf v16.4s, v17.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcvtdq2ps ymm0, ymm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b00 0x5b 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "scvtf v16.4s, v17.4s", + "scvtf v2.4s, v2.4s", + "str q2, [x28, #16]" + ] + }, + "vcvtps2dq xmm0, xmm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0x5b 128-bit" + ], + "ExpectedArm64ASM": [ + "frinti v16.4s, v17.4s", + "fcvtzs v16.4s, v16.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcvtps2dq ymm0, ymm1": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b01 0x5b 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "frinti v16.4s, v17.4s", + "fcvtzs v16.4s, v16.4s", + "frinti v2.4s, v2.4s", + "fcvtzs v2.4s, v2.4s", + "str q2, [x28, #16]" + ] + }, + "vcvttps2dq xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b10 0x5b 128-bit" + ], + "ExpectedArm64ASM": [ + "fcvtzs v16.4s, v17.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcvttps2dq ymm0, ymm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b10 0x5b 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "fcvtzs v16.4s, v17.4s", + "fcvtzs v2.4s, v2.4s", + "str q2, [x28, #16]" + ] + }, + "vsubps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x5c 128-bit" + ], + "ExpectedArm64ASM": [ + "fsub v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vsubps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0x5c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fsub v16.4s, v17.4s, v18.4s", + "fsub v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vsubpd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x5c 128-bit" + ], + "ExpectedArm64ASM": [ + "fsub v16.2d, v17.2d, v18.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vsubpd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x5c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fsub v16.2d, v17.2d, v18.2d", + "fsub v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vsubss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b10 0x5c 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fsub s0, s17, s18", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vsubsd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b11 0x5c 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fsub d0, d17, d18", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vminps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0x5d 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmgt v0.4s, v18.4s, v17.4s", + "mov v16.16b, v17.16b", + "bif v16.16b, v18.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vminps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b00 0x5d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmgt v0.4s, v18.4s, v17.4s", + "mov v16.16b, v17.16b", + "bif v16.16b, v18.16b, v0.16b", + "fcmgt v0.4s, v3.4s, v2.4s", + "bif v2.16b, v3.16b, v0.16b", + "str q2, [x28, #16]" + ] + }, + "vminpd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x5d 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmgt v0.2d, v18.2d, v17.2d", + "mov v16.16b, v17.16b", + "bif v16.16b, v18.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vminpd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b01 0x5d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmgt v0.2d, v18.2d, v17.2d", + "mov v16.16b, v17.16b", + "bif v16.16b, v18.16b, v0.16b", + "fcmgt v0.2d, v3.2d, v2.2d", + "bif v2.16b, v3.16b, v0.16b", + "str q2, [x28, #16]" + ] + }, + "vminss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b10 0x5d 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v16.16b, v17.16b", + "fcmp s17, s18", + "fcsel s0, s17, s18, mi", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]", + "msr nzcv, x20" + ] + }, + "vminsd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b11 0x5d 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v16.16b, v17.16b", + "fcmp d17, d18", + "fcsel d0, d17, d18, mi", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]", + "msr nzcv, x20" + ] + }, + "vdivps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b00 0x5e 128-bit" + ], + "ExpectedArm64ASM": [ + "fdiv v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vdivps ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b00 0x5e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "fdiv v16.4s, v16.4s, v18.4s", + "fdiv v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vdivps ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b00 0x5e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "fdiv v16.4s, v17.4s, v16.4s", + "fdiv v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vdivps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0x5e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fdiv v16.4s, v17.4s, v18.4s", + "fdiv v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vdivpd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x5e 128-bit" + ], + "ExpectedArm64ASM": [ + "fdiv v16.2d, v17.2d, v18.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vdivpd ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b01 0x5e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "fdiv v16.2d, v17.2d, v16.2d", + "fdiv v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vdivpd ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b01 0x5e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "fdiv v16.2d, v16.2d, v18.2d", + "fdiv v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vdivpd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x5e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fdiv v16.2d, v17.2d, v18.2d", + "fdiv v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vdivss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b10 0x5e 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fdiv s0, s17, s18", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vdivsd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b11 0x5e 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "fdiv d0, d17, d18", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmaxps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b00 0x5f 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmgt v0.4s, v18.4s, v17.4s", + "mov v16.16b, v17.16b", + "bit v16.16b, v18.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmaxps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b00 0x5f 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmgt v0.4s, v18.4s, v17.4s", + "mov v16.16b, v17.16b", + "bit v16.16b, v18.16b, v0.16b", + "fcmgt v0.4s, v3.4s, v2.4s", + "bit v2.16b, v3.16b, v0.16b", + "str q2, [x28, #16]" + ] + }, + "vmaxpd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x5f 128-bit" + ], + "ExpectedArm64ASM": [ + "fcmgt v0.2d, v18.2d, v17.2d", + "mov v16.16b, v17.16b", + "bit v16.16b, v18.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmaxpd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b01 0x5f 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fcmgt v0.2d, v18.2d, v17.2d", + "mov v16.16b, v17.16b", + "bit v16.16b, v18.16b, v0.16b", + "fcmgt v0.2d, v3.2d, v2.2d", + "bit v2.16b, v3.16b, v0.16b", + "str q2, [x28, #16]" + ] + }, + "vmaxss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b10 0x5f 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v16.16b, v17.16b", + "fcmp s17, s18", + "fcsel s0, s18, s17, mi", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]", + "msr nzcv, x20" + ] + }, + "vmaxsd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b11 0x5f 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v16.16b, v17.16b", + "fcmp d17, d18", + "fcsel d0, d18, d17, mi", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]", + "msr nzcv, x20" + ] + }, + "vpunpckhbw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x68 128-bit" + ], + "ExpectedArm64ASM": [ + "zip2 v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpunpckhbw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x68 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "zip2 v16.16b, v17.16b, v18.16b", + "zip2 v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpunpckhwd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x69 128-bit" + ], + "ExpectedArm64ASM": [ + "zip2 v16.8h, v17.8h, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpunpckhwd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x69 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "zip2 v16.8h, v17.8h, v18.8h", + "zip2 v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpunpckhdq xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x6a 128-bit" + ], + "ExpectedArm64ASM": [ + "zip2 v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpunpckhdq ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x6a 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "zip2 v16.4s, v17.4s, v18.4s", + "zip2 v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpackssdw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0x6b 128-bit" + ], + "ExpectedArm64ASM": [ + "sqxtn v16.4h, v17.4s", + "sqxtn2 v16.8h, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpackssdw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b01 0x6b 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "sqxtn v16.4h, v17.4s", + "sqxtn2 v16.8h, v18.4s", + "sqxtn v2.4h, v2.4s", + "sqxtn2 v2.8h, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpunpcklqdq xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x6c 128-bit" + ], + "ExpectedArm64ASM": [ + "zip1 v16.2d, v17.2d, v18.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpunpcklqdq ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x6c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "zip1 v16.2d, v17.2d, v18.2d", + "zip1 v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vpunpckhqdq xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x6d 128-bit" + ], + "ExpectedArm64ASM": [ + "zip2 v16.2d, v17.2d, v18.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpunpckhqdq ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x6d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "zip2 v16.2d, v17.2d, v18.2d", + "zip2 v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vmovd xmm0, dword [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x6e 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr s16, [x4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovq xmm0, qword [rax]": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0x6e 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x4]", + "mov v16.8b, v2.8b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovdqa xmm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x6f 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovdqa [rax], xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b01 0x6f 128-bit" + ], + "ExpectedArm64ASM": [ + "str q16, [x4]" + ] + }, + "vmovdqu xmm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b10 0x6f 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vmovdqu [rax], xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b10 0x6f 128-bit" + ], + "ExpectedArm64ASM": [ + "str q16, [x4]" + ] + }, + "vhaddpd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x7c 128-bit" + ], + "ExpectedArm64ASM": [ + "faddp v16.2d, v17.2d, v18.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vhaddpd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x7c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "faddp v16.2d, v17.2d, v18.2d", + "faddp v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vhaddps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b11 0x7c 128-bit" + ], + "ExpectedArm64ASM": [ + "faddp v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vhaddps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b11 0x7c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "faddp v16.4s, v17.4s, v18.4s", + "faddp v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vhsubpd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0x7d 128-bit" + ], + "ExpectedArm64ASM": [ + "uzp1 v2.2d, v17.2d, v18.2d", + "uzp2 v3.2d, v17.2d, v18.2d", + "fsub v16.2d, v2.2d, v3.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vhsubpd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 1 0b01 0x7d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "uzp1 v4.2d, v17.2d, v18.2d", + "uzp2 v5.2d, v17.2d, v18.2d", + "fsub v16.2d, v4.2d, v5.2d", + "uzp1 v4.2d, v2.2d, v3.2d", + "uzp2 v2.2d, v2.2d, v3.2d", + "fsub v2.2d, v4.2d, v2.2d", + "str q2, [x28, #16]" + ] + }, + "vhsubps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b11 0x7d 128-bit" + ], + "ExpectedArm64ASM": [ + "uzp1 v2.4s, v17.4s, v18.4s", + "uzp2 v3.4s, v17.4s, v18.4s", + "fsub v16.4s, v2.4s, v3.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vhsubps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 1 0b11 0x7d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "uzp1 v4.4s, v17.4s, v18.4s", + "uzp2 v5.4s, v17.4s, v18.4s", + "fsub v16.4s, v4.4s, v5.4s", + "uzp1 v4.4s, v2.4s, v3.4s", + "uzp2 v2.4s, v2.4s, v3.4s", + "fsub v2.4s, v4.4s, v2.4s", + "str q2, [x28, #16]" + ] + }, + "vmovd dword [rax], xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b01 0x7e 128-bit" + ], + "ExpectedArm64ASM": [ + "str s16, [x4]" + ] + }, + "vmovq qword [rax], xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b01 0x7e 128-bit" + ], + "ExpectedArm64ASM": [ + "str d16, [x4]" + ] + }, + "vmovdqa ymm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x7f 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]", + "ldr q2, [x4, #16]", + "str q2, [x28, #16]" + ] + }, + "vmovdqa [rax], ymm0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0x7f 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "str q16, [x4]", + "str q2, [x4, #16]" + ] + }, + "vmovdqu ymm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b10 0x7f 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]", + "ldr q2, [x4, #16]", + "str q2, [x28, #16]" + ] + }, + "vmovdqu [rax], ymm0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b10 0x7f 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "str q16, [x4]", + "str q2, [x4, #16]" + ] + }, + "vaddsubpd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xd0 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2416]", + "eor v2.16b, v18.16b, v2.16b", + "fadd v16.2d, v17.2d, v2.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vaddsubpd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b01 0xd0 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "ldr q4, [x28, #2416]", + "eor v5.16b, v18.16b, v4.16b", + "fadd v16.2d, v17.2d, v5.2d", + "eor v3.16b, v3.16b, v4.16b", + "fadd v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vaddsubps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b11 0xd0 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2384]", + "eor v2.16b, v18.16b, v2.16b", + "fadd v16.4s, v17.4s, v2.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vaddsubps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 1 0b11 0xd0 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "ldr q4, [x28, #2384]", + "eor v5.16b, v18.16b, v4.16b", + "fadd v16.4s, v17.4s, v5.4s", + "eor v3.16b, v3.16b, v4.16b", + "fadd v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpsrlw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b01 0xd1 128-bit" + ], + "ExpectedArm64ASM": [ + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.8h, v0.h[0]", + "neg v0.8h, v0.8h", + "ushl v16.8h, v17.8h, v0.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrlw ymm0, ymm1, xmm2": { + "ExpectedInstructionCount": 12, + "Comment": [ + "Map 1 0b01 0xd1 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.8h, v0.h[0]", + "neg v0.8h, v0.8h", + "ushl v16.8h, v17.8h, v0.8h", + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.8h, v0.h[0]", + "neg v0.8h, v0.8h", + "ushl v2.8h, v2.8h, v0.8h", + "str q2, [x28, #16]" + ] + }, + "vpsrld xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b01 0xd2 128-bit" + ], + "ExpectedArm64ASM": [ + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.4s, v0.s[0]", + "neg v0.4s, v0.4s", + "ushl v16.4s, v17.4s, v0.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrld ymm0, ymm1, xmm2": { + "ExpectedInstructionCount": 12, + "Comment": [ + "Map 1 0b01 0xd2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.4s, v0.s[0]", + "neg v0.4s, v0.4s", + "ushl v16.4s, v17.4s, v0.4s", + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.4s, v0.s[0]", + "neg v0.4s, v0.4s", + "ushl v2.4s, v2.4s, v0.4s", + "str q2, [x28, #16]" + ] + }, + "vpsrlq xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b01 0xd3 128-bit" + ], + "ExpectedArm64ASM": [ + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.2d, v0.d[0]", + "neg v0.2d, v0.2d", + "ushl v16.2d, v17.2d, v0.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrlq ymm0, ymm1, xmm2": { + "ExpectedInstructionCount": 12, + "Comment": [ + "Map 1 0b01 0xd3 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.2d, v0.d[0]", + "neg v0.2d, v0.2d", + "ushl v16.2d, v17.2d, v0.2d", + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.2d, v0.d[0]", + "neg v0.2d, v0.2d", + "ushl v2.2d, v2.2d, v0.2d", + "str q2, [x28, #16]" + ] + }, + "vpaddq xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xd4 128-bit" + ], + "ExpectedArm64ASM": [ + "add v16.2d, v17.2d, v18.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpaddq ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xd4 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "add v16.2d, v17.2d, v18.2d", + "add v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vpmullw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xd5 128-bit" + ], + "ExpectedArm64ASM": [ + "mul v16.8h, v17.8h, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmullw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xd4 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "mul v16.8h, v17.8h, v18.8h", + "mul v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vmovq [rax], xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b01 0xd6 256-bit" + ], + "ExpectedArm64ASM": [ + "str d16, [x4]" + ] + }, + "vpmovmskb rax, xmm0": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b01 0xd7 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2640]", + "cmlt v3.16b, v16.16b, #0", + "and v2.16b, v3.16b, v2.16b", + "addp v2.16b, v2.16b, v2.16b", + "addp v2.16b, v2.16b, v2.16b", + "addp v2.8b, v2.8b, v2.8b", + "umov w4, v2.h[0]" + ] + }, + "vpmovmskb rax, ymm0": { + "ExpectedInstructionCount": 16, + "Comment": [ + "Map 1 0b01 0xd7 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #2640]", + "cmlt v4.16b, v16.16b, #0", + "and v4.16b, v4.16b, v3.16b", + "addp v4.16b, v4.16b, v4.16b", + "addp v4.16b, v4.16b, v4.16b", + "addp v4.8b, v4.8b, v4.8b", + "umov w20, v4.h[0]", + "cmlt v2.16b, v2.16b, #0", + "and v2.16b, v2.16b, v3.16b", + "addp v2.16b, v2.16b, v2.16b", + "addp v2.16b, v2.16b, v2.16b", + "addp v2.8b, v2.8b, v2.8b", + "umov w21, v2.h[0]", + "orr x20, x20, x21, lsl #16", + "mov w4, w20" + ] + }, + "vpsubusb xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xd8 128-bit" + ], + "ExpectedArm64ASM": [ + "uqsub v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsubusb ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xd8 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "uqsub v16.16b, v17.16b, v18.16b", + "uqsub v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpsubusw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xd9 128-bit" + ], + "ExpectedArm64ASM": [ + "uqsub v16.8h, v17.8h, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsubusw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xd9 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "uqsub v16.8h, v17.8h, v18.8h", + "uqsub v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpminub xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xda 128-bit" + ], + "ExpectedArm64ASM": [ + "umin v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpminub ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b01 0xda 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "umin v16.16b, v17.16b, v16.16b", + "umin v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpminub ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b01 0xda 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "umin v16.16b, v16.16b, v18.16b", + "umin v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpminub ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xda 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "umin v16.16b, v17.16b, v18.16b", + "umin v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpand xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xdb 128-bit" + ], + "ExpectedArm64ASM": [ + "and v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpand ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xdb 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "and v16.16b, v17.16b, v18.16b", + "and v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpaddusb xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xdc 128-bit" + ], + "ExpectedArm64ASM": [ + "uqadd v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpaddusb ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xdc 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "uqadd v16.16b, v17.16b, v18.16b", + "uqadd v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpaddusw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xdd 128-bit" + ], + "ExpectedArm64ASM": [ + "uqadd v16.8h, v17.8h, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpaddusw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xdd 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "uqadd v16.8h, v17.8h, v18.8h", + "uqadd v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpmaxub xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b01 0xdd 128-bit" + ], + "ExpectedArm64ASM": [ + "umax v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmaxub ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b01 0xde 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "umax v16.16b, v16.16b, v18.16b", + "umax v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpmaxub ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b01 0xde 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "umax v16.16b, v17.16b, v16.16b", + "umax v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpmaxub ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xde 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "umax v16.16b, v17.16b, v18.16b", + "umax v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpandn xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xdf 128-bit" + ], + "ExpectedArm64ASM": [ + "bic v16.16b, v18.16b, v17.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpandn ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xdf 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "bic v16.16b, v18.16b, v17.16b", + "bic v2.16b, v3.16b, v2.16b", + "str q2, [x28, #16]" + ] + }, + "vpavgb xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xe0 128-bit" + ], + "ExpectedArm64ASM": [ + "urhadd v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpavgb ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b01 0xe0 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "urhadd v16.16b, v17.16b, v16.16b", + "urhadd v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpavgb ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b01 0xe0 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "urhadd v16.16b, v16.16b, v18.16b", + "urhadd v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpavgb ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xe0 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "urhadd v16.16b, v17.16b, v18.16b", + "urhadd v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpsraw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b01 0xe1 128-bit" + ], + "ExpectedArm64ASM": [ + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.8h, v0.h[0]", + "neg v0.8h, v0.8h", + "sshl v16.8h, v17.8h, v0.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsraw ymm0, ymm1, xmm2": { + "ExpectedInstructionCount": 12, + "Comment": [ + "Map 1 0b01 0xe1 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.8h, v0.h[0]", + "neg v0.8h, v0.8h", + "sshl v16.8h, v17.8h, v0.8h", + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.8h, v0.h[0]", + "neg v0.8h, v0.8h", + "sshl v2.8h, v2.8h, v0.8h", + "str q2, [x28, #16]" + ] + }, + "vpsrad xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b01 0xe2 128-bit" + ], + "ExpectedArm64ASM": [ + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.4s, v0.s[0]", + "neg v0.4s, v0.4s", + "sshl v16.4s, v17.4s, v0.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrad ymm0, ymm1, xmm2": { + "ExpectedInstructionCount": 12, + "Comment": [ + "Map 1 0b01 0xe2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.4s, v0.s[0]", + "neg v0.4s, v0.4s", + "sshl v16.4s, v17.4s, v0.4s", + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.4s, v0.s[0]", + "neg v0.4s, v0.4s", + "sshl v2.4s, v2.4s, v0.4s", + "str q2, [x28, #16]" + ] + }, + "vpavgw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xe3 128-bit" + ], + "ExpectedArm64ASM": [ + "urhadd v16.8h, v17.8h, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpavgw ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b01 0xe3 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "urhadd v16.8h, v17.8h, v16.8h", + "urhadd v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpavgw ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b01 0xe3 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "urhadd v16.8h, v16.8h, v18.8h", + "urhadd v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpavgw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xe3 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "urhadd v16.8h, v17.8h, v18.8h", + "urhadd v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpmulhuw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xe4 128-bit" + ], + "ExpectedArm64ASM": [ + "umull2 v0.4s, v17.8h, v18.8h", + "umull v16.4s, v17.4h, v18.4h", + "uzp2 v16.8h, v16.8h, v0.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmulhuw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 1 0b01 0xe4 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "umull2 v0.4s, v17.8h, v18.8h", + "umull v16.4s, v17.4h, v18.4h", + "uzp2 v16.8h, v16.8h, v0.8h", + "umull2 v0.4s, v2.8h, v3.8h", + "umull v2.4s, v2.4h, v3.4h", + "uzp2 v2.8h, v2.8h, v0.8h", + "str q2, [x28, #16]" + ] + }, + "vpmulhw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xe5 128-bit" + ], + "ExpectedArm64ASM": [ + "smull2 v0.4s, v17.8h, v18.8h", + "smull v16.4s, v17.4h, v18.4h", + "uzp2 v16.8h, v16.8h, v0.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmulhw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 1 0b01 0xe5 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "smull2 v0.4s, v17.8h, v18.8h", + "smull v16.4s, v17.4h, v18.4h", + "uzp2 v16.8h, v16.8h, v0.8h", + "smull2 v0.4s, v2.8h, v3.8h", + "smull v2.4s, v2.4h, v3.4h", + "uzp2 v2.8h, v2.8h, v0.8h", + "str q2, [x28, #16]" + ] + }, + "vcvttpd2dq xmm0, xmm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0xe6 128-bit" + ], + "ExpectedArm64ASM": [ + "fcvtn v2.2s, v17.2d", + "fcvtzs v16.4s, v2.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcvttpd2dq xmm0, ymm1": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b01 0xe6 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "fcvtn v3.2s, v17.2d", + "fcvtzs v3.4s, v3.4s", + "fcvtn v2.2s, v2.2d", + "fcvtzs v2.4s, v2.4s", + "mov v16.16b, v3.16b", + "mov v16.d[1], v2.d[0]" + ] + }, + "vcvtdq2pd xmm0, xmm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b10 0xe6 128-bit" + ], + "ExpectedArm64ASM": [ + "sxtl v2.2d, v17.2s", + "scvtf v16.2d, v2.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcvtdq2pd ymm0, xmm1": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b10 0xe6 256-bit" + ], + "ExpectedArm64ASM": [ + "sxtl v2.2d, v17.2s", + "scvtf v16.2d, v2.2d", + "sxtl2 v2.2d, v17.4s", + "scvtf v2.2d, v2.2d", + "str q2, [x28, #16]" + ] + }, + "vcvtpd2dq xmm0, xmm1": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b11 0xe6 128-bit" + ], + "ExpectedArm64ASM": [ + "fcvtn v2.2s, v17.2d", + "frinti v16.4s, v2.4s", + "fcvtzs v16.4s, v16.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcvtpd2dq xmm0, ymm1": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 1 0b11 0xe6 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "fcvtn v3.2s, v17.2d", + "frinti v3.4s, v3.4s", + "fcvtzs v3.4s, v3.4s", + "fcvtn v2.2s, v2.2d", + "frinti v2.4s, v2.4s", + "fcvtzs v2.4s, v2.4s", + "mov v16.16b, v3.16b", + "mov v16.d[1], v2.d[0]" + ] + }, + "vmovntdq [rax], xmm0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 1 0b01 0xe7 128-bit" + ], + "ExpectedArm64ASM": [ + "str q16, [x4]" + ] + }, + "vmovntdq [rax], ymm0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xe7 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "str q16, [x4]", + "str q2, [x4, #16]" + ] + }, + "vpsubsb xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xe8 128-bit" + ], + "ExpectedArm64ASM": [ + "sqsub v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsubsb ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xe8 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "sqsub v16.16b, v17.16b, v18.16b", + "sqsub v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpsubsw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xe9 128-bit" + ], + "ExpectedArm64ASM": [ + "sqsub v16.8h, v17.8h, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsubsw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xe9 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "sqsub v16.8h, v17.8h, v18.8h", + "sqsub v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpminsw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xea 128-bit" + ], + "ExpectedArm64ASM": [ + "smin v16.8h, v17.8h, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpminsw ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b01 0xea 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "smin v16.8h, v17.8h, v16.8h", + "smin v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpminsw ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b01 0xea 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "smin v16.8h, v16.8h, v18.8h", + "smin v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpminsw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xea 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "smin v16.8h, v17.8h, v18.8h", + "smin v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpor xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xeb 128-bit" + ], + "ExpectedArm64ASM": [ + "orr v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpor ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xeb 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "orr v16.16b, v17.16b, v18.16b", + "orr v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpaddsb xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xec 128-bit" + ], + "ExpectedArm64ASM": [ + "sqadd v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpaddsb ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xec 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "sqadd v16.16b, v17.16b, v18.16b", + "sqadd v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpaddsw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xed 128-bit" + ], + "ExpectedArm64ASM": [ + "sqadd v16.8h, v17.8h, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpaddsw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xed 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "sqadd v16.8h, v17.8h, v18.8h", + "sqadd v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpmaxsw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xee 128-bit" + ], + "ExpectedArm64ASM": [ + "smax v16.8h, v17.8h, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmaxsw ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b01 0xee 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "smax v16.8h, v17.8h, v16.8h", + "smax v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpmaxsw ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b01 0xee 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "smax v16.8h, v16.8h, v18.8h", + "smax v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpmaxsw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xee 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "smax v16.8h, v17.8h, v18.8h", + "smax v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpxor xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xef 128-bit" + ], + "ExpectedArm64ASM": [ + "eor v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpxor ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xef 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "eor v16.16b, v17.16b, v18.16b", + "eor v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vlddqu xmm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b11 0xf0 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vlddqu ymm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b11 0xf0 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]", + "ldr q2, [x4, #16]", + "str q2, [x28, #16]" + ] + }, + "vpsllw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b01 0xf1 128-bit" + ], + "ExpectedArm64ASM": [ + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.8h, v0.h[0]", + "ushl v16.8h, v17.8h, v0.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsllw ymm0, ymm1, xmm2": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 1 0b01 0xf1 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.8h, v0.h[0]", + "ushl v16.8h, v17.8h, v0.8h", + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.8h, v0.h[0]", + "ushl v2.8h, v2.8h, v0.8h", + "str q2, [x28, #16]" + ] + }, + "vpslld xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b01 0xf2 128-bit" + ], + "ExpectedArm64ASM": [ + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.4s, v0.s[0]", + "ushl v16.4s, v17.4s, v0.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpslld ymm0, ymm1, xmm2": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 1 0b01 0xf2 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.4s, v0.s[0]", + "ushl v16.4s, v17.4s, v0.4s", + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.4s, v0.s[0]", + "ushl v2.4s, v2.4s, v0.4s", + "str q2, [x28, #16]" + ] + }, + "vpsllq xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 1 0b01 0xf3 128-bit" + ], + "ExpectedArm64ASM": [ + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.2d, v0.d[0]", + "ushl v16.2d, v17.2d, v0.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsllq ymm0, ymm1, xmm2": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 1 0b01 0xf3 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.2d, v0.d[0]", + "ushl v16.2d, v17.2d, v0.2d", + "uqshl d0, d18, #57", + "ushr d0, d0, #57", + "dup v0.2d, v0.d[0]", + "ushl v2.2d, v2.2d, v0.2d", + "str q2, [x28, #16]" + ] + }, + "vpmuludq xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xf4 128-bit" + ], + "ExpectedArm64ASM": [ + "uzp1 v2.4s, v17.4s, v17.4s", + "uzp1 v3.4s, v18.4s, v18.4s", + "umull v16.2d, v2.2s, v3.2s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmuludq ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 1 0b01 0xf4 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "uzp1 v4.4s, v17.4s, v17.4s", + "uzp1 v5.4s, v18.4s, v18.4s", + "umull v16.2d, v4.2s, v5.2s", + "uzp1 v2.4s, v2.4s, v2.4s", + "uzp1 v3.4s, v3.4s, v3.4s", + "umull v2.2d, v2.2s, v3.2s", + "str q2, [x28, #16]" + ] + }, + "vpmaddwd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xf5 128-bit" + ], + "ExpectedArm64ASM": [ + "smull v2.4s, v17.4h, v18.4h", + "smull2 v3.4s, v17.8h, v18.8h", + "addp v16.4s, v2.4s, v3.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmaddwd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 1 0b01 0xf5 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "smull v4.4s, v17.4h, v18.4h", + "smull2 v5.4s, v17.8h, v18.8h", + "addp v16.4s, v4.4s, v5.4s", + "smull v4.4s, v2.4h, v3.4h", + "smull2 v2.4s, v2.8h, v3.8h", + "addp v2.4s, v4.4s, v2.4s", + "str q2, [x28, #16]" + ] + }, + "vpsadbw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b01 0xf6 128-bit" + ], + "ExpectedArm64ASM": [ + "uabdl v2.8h, v17.8b, v18.8b", + "uabdl2 v3.8h, v17.16b, v18.16b", + "addv h2, v2.8h", + "addv h3, v3.8h", + "zip1 v16.2d, v2.2d, v3.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsadbw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 1 0b01 0xf6 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "uabdl v4.8h, v17.8b, v18.8b", + "uabdl2 v5.8h, v17.16b, v18.16b", + "addv h4, v4.8h", + "addv h5, v5.8h", + "zip1 v16.2d, v4.2d, v5.2d", + "uabdl v4.8h, v2.8b, v3.8b", + "uabdl2 v2.8h, v2.16b, v3.16b", + "addv h3, v4.8h", + "addv h2, v2.8h", + "zip1 v2.2d, v3.2d, v2.2d", + "str q2, [x28, #16]" + ] + }, + "vmaskmovdqu xmm0, xmm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0xf7 128-bit" + ], + "ExpectedArm64ASM": [ + "cmlt v2.16b, v17.16b, #0", + "ldr q3, [x11]", + "bsl v2.16b, v16.16b, v3.16b", + "str q2, [x11]" + ] + }, + "vpsubb xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xf8 128-bit" + ], + "ExpectedArm64ASM": [ + "sub v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsubb ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xf8 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "sub v16.16b, v17.16b, v18.16b", + "sub v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpsubw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xf9 128-bit" + ], + "ExpectedArm64ASM": [ + "sub v16.8h, v17.8h, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsubw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xf9 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "sub v16.8h, v17.8h, v18.8h", + "sub v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpsubd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xfa 128-bit" + ], + "ExpectedArm64ASM": [ + "sub v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsubd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xfa 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "sub v16.4s, v17.4s, v18.4s", + "sub v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpsubq xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xfb 128-bit" + ], + "ExpectedArm64ASM": [ + "sub v16.2d, v17.2d, v18.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsubq ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xfb 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "sub v16.2d, v17.2d, v18.2d", + "sub v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vpaddb xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xfc 128-bit" + ], + "ExpectedArm64ASM": [ + "add v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpaddb ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xfc 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "add v16.16b, v17.16b, v18.16b", + "add v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpaddw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xfd 128-bit" + ], + "ExpectedArm64ASM": [ + "add v16.8h, v17.8h, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpaddw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xfd 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "add v16.8h, v17.8h, v18.8h", + "add v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpaddd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 1 0b01 0xfe 128-bit" + ], + "ExpectedArm64ASM": [ + "add v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpaddd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 1 0b01 0xfe 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "add v16.4s, v17.4s, v18.4s", + "add v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + } + } +} diff --git a/unittests/InstructionCountCI/AVX128/VEX_map1_FCMA.json b/unittests/InstructionCountCI/AVX128/VEX_map1_FCMA.json new file mode 100644 index 0000000000..19213641c4 --- /dev/null +++ b/unittests/InstructionCountCI/AVX128/VEX_map1_FCMA.json @@ -0,0 +1,134 @@ +{ + "Features": { + "Bitness": 64, + "EnabledHostFeatures": [ + "AFP", + "FCMA", + "AVX" + ], + "DisabledHostFeatures": [ + "SVE256", + "SVE128" + ] + }, + "Instructions": { + "vaddsubpd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b01 0xd0 128-bit" + ], + "ExpectedArm64ASM": [ + "ext v2.16b, v18.16b, v18.16b, #8", + "fcadd v16.2d, v17.2d, v2.2d, #90", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vaddsubpd ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b01 0xd0 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "ext v4.16b, v18.16b, v18.16b, #8", + "fcadd v16.2d, v16.2d, v4.2d, #90", + "ext v3.16b, v3.16b, v3.16b, #8", + "fcadd v2.2d, v2.2d, v3.2d, #90", + "str q2, [x28, #16]" + ] + }, + "vaddsubpd ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b01 0xd0 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "ext v4.16b, v16.16b, v16.16b, #8", + "fcadd v16.2d, v17.2d, v4.2d, #90", + "ext v3.16b, v3.16b, v3.16b, #8", + "fcadd v2.2d, v2.2d, v3.2d, #90", + "str q2, [x28, #16]" + ] + }, + "vaddsubpd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b01 0xd0 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "ext v4.16b, v18.16b, v18.16b, #8", + "fcadd v16.2d, v17.2d, v4.2d, #90", + "ext v3.16b, v3.16b, v3.16b, #8", + "fcadd v2.2d, v2.2d, v3.2d, #90", + "str q2, [x28, #16]" + ] + }, + "vaddsubps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 1 0b11 0xd0 128-bit" + ], + "ExpectedArm64ASM": [ + "rev64 v2.4s, v18.4s", + "fcadd v16.4s, v17.4s, v2.4s, #90", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vaddsubps ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b11 0xd0 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "rev64 v4.4s, v16.4s", + "fcadd v16.4s, v17.4s, v4.4s, #90", + "rev64 v3.4s, v3.4s", + "fcadd v2.4s, v2.4s, v3.4s, #90", + "str q2, [x28, #16]" + ] + }, + "vaddsubps ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Aliasing source and destination", + "Map 1 0b11 0xd0 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "rev64 v4.4s, v18.4s", + "fcadd v16.4s, v16.4s, v4.4s, #90", + "rev64 v3.4s, v3.4s", + "fcadd v2.4s, v2.4s, v3.4s, #90", + "str q2, [x28, #16]" + ] + }, + "vaddsubps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 1 0b11 0xd0 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "rev64 v4.4s, v18.4s", + "fcadd v16.4s, v17.4s, v4.4s, #90", + "rev64 v3.4s, v3.4s", + "fcadd v2.4s, v2.4s, v3.4s, #90", + "str q2, [x28, #16]" + ] + } + } +} diff --git a/unittests/InstructionCountCI/AVX128/VEX_map2.json b/unittests/InstructionCountCI/AVX128/VEX_map2.json new file mode 100644 index 0000000000..9ab5f7ddac --- /dev/null +++ b/unittests/InstructionCountCI/AVX128/VEX_map2.json @@ -0,0 +1,6421 @@ +{ + "Features": { + "Bitness": 64, + "EnabledHostFeatures": [ + "AVX" + ], + "DisabledHostFeatures": [ + "AFP", + "FLAGM", + "FLAGM2", + "SVE128", + "SVE256" + ] + }, + "Instructions": { + "vpshufb xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x00 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.16b, #0x8f", + "and v2.16b, v18.16b, v2.16b", + "tbl v16.16b, {v17.16b}, v2.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpshufb ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 2 0b01 0x00 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v4.16b, #0x8f", + "and v4.16b, v18.16b, v4.16b", + "tbl v16.16b, {v17.16b}, v4.16b", + "movi v4.16b, #0x8f", + "and v3.16b, v3.16b, v4.16b", + "tbl v2.16b, {v2.16b}, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vphaddw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x01 128-bit" + ], + "ExpectedArm64ASM": [ + "addp v16.8h, v17.8h, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vphaddw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x01 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "addp v16.8h, v17.8h, v18.8h", + "addp v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vphaddd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x02 128-bit" + ], + "ExpectedArm64ASM": [ + "addp v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vphaddd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x02 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "addp v16.4s, v17.4s, v18.4s", + "addp v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vphaddsw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x03 128-bit" + ], + "ExpectedArm64ASM": [ + "uzp1 v2.8h, v17.8h, v18.8h", + "uzp2 v3.8h, v17.8h, v18.8h", + "sqadd v16.8h, v2.8h, v3.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vphaddsw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 2 0b01 0x03 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "uzp1 v4.8h, v17.8h, v18.8h", + "uzp2 v5.8h, v17.8h, v18.8h", + "sqadd v16.8h, v4.8h, v5.8h", + "uzp1 v4.8h, v2.8h, v3.8h", + "uzp2 v2.8h, v2.8h, v3.8h", + "sqadd v2.8h, v4.8h, v2.8h", + "str q2, [x28, #16]" + ] + }, + "vpmaddubsw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0x04 128-bit" + ], + "ExpectedArm64ASM": [ + "uxtl v2.8h, v17.8b", + "sxtl v3.8h, v18.8b", + "mul v2.8h, v2.8h, v3.8h", + "uxtl2 v3.8h, v17.16b", + "sxtl2 v4.8h, v18.16b", + "mul v3.8h, v3.8h, v4.8h", + "uzp1 v4.8h, v2.8h, v3.8h", + "uzp2 v2.8h, v2.8h, v3.8h", + "sqadd v16.8h, v4.8h, v2.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmaddubsw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 21, + "Comment": [ + "Map 2 0b01 0x04 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "uxtl v4.8h, v17.8b", + "sxtl v5.8h, v18.8b", + "mul v4.8h, v4.8h, v5.8h", + "uxtl2 v5.8h, v17.16b", + "sxtl2 v6.8h, v18.16b", + "mul v5.8h, v5.8h, v6.8h", + "uzp1 v6.8h, v4.8h, v5.8h", + "uzp2 v4.8h, v4.8h, v5.8h", + "sqadd v16.8h, v6.8h, v4.8h", + "uxtl v4.8h, v2.8b", + "sxtl v5.8h, v3.8b", + "mul v4.8h, v4.8h, v5.8h", + "uxtl2 v2.8h, v2.16b", + "sxtl2 v3.8h, v3.16b", + "mul v2.8h, v2.8h, v3.8h", + "uzp1 v3.8h, v4.8h, v2.8h", + "uzp2 v2.8h, v4.8h, v2.8h", + "sqadd v2.8h, v3.8h, v2.8h", + "str q2, [x28, #16]" + ] + }, + "vphsubw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x05 128-bit" + ], + "ExpectedArm64ASM": [ + "uzp1 v2.8h, v17.8h, v18.8h", + "uzp2 v3.8h, v17.8h, v18.8h", + "sub v16.8h, v2.8h, v3.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vphsubw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 2 0b01 0x05 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "uzp1 v4.8h, v17.8h, v18.8h", + "uzp2 v5.8h, v17.8h, v18.8h", + "sub v16.8h, v4.8h, v5.8h", + "uzp1 v4.8h, v2.8h, v3.8h", + "uzp2 v2.8h, v2.8h, v3.8h", + "sub v2.8h, v4.8h, v2.8h", + "str q2, [x28, #16]" + ] + }, + "vphsubd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x06 128-bit" + ], + "ExpectedArm64ASM": [ + "uzp1 v2.4s, v17.4s, v18.4s", + "uzp2 v3.4s, v17.4s, v18.4s", + "sub v16.4s, v2.4s, v3.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vphsubd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 2 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "uzp1 v4.4s, v17.4s, v18.4s", + "uzp2 v5.4s, v17.4s, v18.4s", + "sub v16.4s, v4.4s, v5.4s", + "uzp1 v4.4s, v2.4s, v3.4s", + "uzp2 v2.4s, v2.4s, v3.4s", + "sub v2.4s, v4.4s, v2.4s", + "str q2, [x28, #16]" + ] + }, + "vphsubsw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x07 128-bit" + ], + "ExpectedArm64ASM": [ + "uzp1 v2.8h, v17.8h, v18.8h", + "uzp2 v3.8h, v17.8h, v18.8h", + "sqsub v16.8h, v2.8h, v3.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vphsubsw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 2 0b01 0x07 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "uzp1 v4.8h, v17.8h, v18.8h", + "uzp2 v5.8h, v17.8h, v18.8h", + "sqsub v16.8h, v4.8h, v5.8h", + "uzp1 v4.8h, v2.8h, v3.8h", + "uzp2 v2.8h, v2.8h, v3.8h", + "sqsub v2.8h, v4.8h, v2.8h", + "str q2, [x28, #16]" + ] + }, + "vpsignb xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x08 128-bit" + ], + "ExpectedArm64ASM": [ + "sqshl v2.16b, v18.16b, #7", + "srshr v2.16b, v2.16b, #7", + "mul v16.16b, v17.16b, v2.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsignb ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 2 0b01 0x08 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "sqshl v4.16b, v18.16b, #7", + "srshr v4.16b, v4.16b, #7", + "mul v16.16b, v17.16b, v4.16b", + "sqshl v3.16b, v3.16b, #7", + "srshr v3.16b, v3.16b, #7", + "mul v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpsignw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x09 128-bit" + ], + "ExpectedArm64ASM": [ + "sqshl v2.8h, v18.8h, #15", + "srshr v2.8h, v2.8h, #15", + "mul v16.8h, v17.8h, v2.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsignw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 2 0b01 0x09 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "sqshl v4.8h, v18.8h, #15", + "srshr v4.8h, v4.8h, #15", + "mul v16.8h, v17.8h, v4.8h", + "sqshl v3.8h, v3.8h, #15", + "srshr v3.8h, v3.8h, #15", + "mul v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpsignd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x0a 128-bit" + ], + "ExpectedArm64ASM": [ + "sqshl v2.4s, v18.4s, #31", + "srshr v2.4s, v2.4s, #31", + "mul v16.4s, v17.4s, v2.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsignd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 2 0b01 0x0a 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "sqshl v4.4s, v18.4s, #31", + "srshr v4.4s, v4.4s, #31", + "mul v16.4s, v17.4s, v4.4s", + "sqshl v3.4s, v3.4s, #31", + "srshr v3.4s, v3.4s, #31", + "mul v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpmulhrsw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 2 0b01 0x0b 128-bit" + ], + "ExpectedArm64ASM": [ + "smull v2.4s, v17.4h, v18.4h", + "smull2 v3.4s, v17.8h, v18.8h", + "sshr v2.4s, v2.4s, #14", + "sshr v3.4s, v3.4s, #14", + "movi v4.4s, #0x1, lsl #0", + "add v2.4s, v2.4s, v4.4s", + "add v3.4s, v3.4s, v4.4s", + "shrn v2.4h, v2.4s, #1", + "mov v0.16b, v2.16b", + "shrn2 v0.8h, v3.4s, #1", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmulhrsw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 23, + "Comment": [ + "Map 2 0b01 0x0b 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "smull v4.4s, v17.4h, v18.4h", + "smull2 v5.4s, v17.8h, v18.8h", + "sshr v4.4s, v4.4s, #14", + "sshr v5.4s, v5.4s, #14", + "movi v6.4s, #0x1, lsl #0", + "add v4.4s, v4.4s, v6.4s", + "add v5.4s, v5.4s, v6.4s", + "shrn v4.4h, v4.4s, #1", + "mov v0.16b, v4.16b", + "shrn2 v0.8h, v5.4s, #1", + "mov v16.16b, v0.16b", + "smull v4.4s, v2.4h, v3.4h", + "smull2 v2.4s, v2.8h, v3.8h", + "sshr v4.4s, v4.4s, #14", + "sshr v2.4s, v2.4s, #14", + "movi v3.4s, #0x1, lsl #0", + "add v4.4s, v4.4s, v3.4s", + "add v2.4s, v2.4s, v3.4s", + "shrn v4.4h, v4.4s, #1", + "shrn2 v4.8h, v2.4s, #1", + "str q4, [x28, #16]" + ] + }, + "vpermilps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 12, + "Comment": [ + "Map 2 0b01 0x0c 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.4s, #0x3, lsl #0", + "and v2.16b, v18.16b, v2.16b", + "trn1 v2.16b, v2.16b, v2.16b", + "trn1 v2.8h, v2.8h, v2.8h", + "shl v2.16b, v2.16b, #2", + "mov w20, #0x100", + "movk w20, #0x302, lsl #16", + "dup v3.4s, w20", + "add v2.16b, v3.16b, v2.16b", + "tbl v16.16b, {v17.16b}, v2.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpermilps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 21, + "Comment": [ + "Map 2 0b01 0x0c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v4.4s, #0x3, lsl #0", + "and v4.16b, v18.16b, v4.16b", + "trn1 v4.16b, v4.16b, v4.16b", + "trn1 v4.8h, v4.8h, v4.8h", + "shl v4.16b, v4.16b, #2", + "mov w20, #0x100", + "movk w20, #0x302, lsl #16", + "dup v5.4s, w20", + "add v4.16b, v5.16b, v4.16b", + "tbl v16.16b, {v17.16b}, v4.16b", + "movi v4.4s, #0x3, lsl #0", + "and v3.16b, v3.16b, v4.16b", + "trn1 v3.16b, v3.16b, v3.16b", + "trn1 v3.8h, v3.8h, v3.8h", + "shl v3.16b, v3.16b, #2", + "dup v4.4s, w20", + "add v3.16b, v4.16b, v3.16b", + "tbl v2.16b, {v2.16b}, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpermilpd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x0d 128-bit" + ], + "ExpectedArm64ASM": [ + "ushr v2.2d, v18.2d, #1", + "mov w0, #0x1", + "dup v3.2d, x0", + "and v2.16b, v2.16b, v3.16b", + "trn1 v2.16b, v2.16b, v2.16b", + "trn1 v2.8h, v2.8h, v2.8h", + "trn1 v2.4s, v2.4s, v2.4s", + "shl v2.16b, v2.16b, #3", + "mov x20, #0x100", + "movk x20, #0x302, lsl #16", + "movk x20, #0x504, lsl #32", + "movk x20, #0x706, lsl #48", + "dup v3.2d, x20", + "add v2.16b, v3.16b, v2.16b", + "tbl v16.16b, {v17.16b}, v2.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpermilpd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 29, + "Comment": [ + "Map 2 0b01 0x0d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "ushr v4.2d, v18.2d, #1", + "mov w0, #0x1", + "dup v5.2d, x0", + "and v4.16b, v4.16b, v5.16b", + "trn1 v4.16b, v4.16b, v4.16b", + "trn1 v4.8h, v4.8h, v4.8h", + "trn1 v4.4s, v4.4s, v4.4s", + "shl v4.16b, v4.16b, #3", + "mov x20, #0x100", + "movk x20, #0x302, lsl #16", + "movk x20, #0x504, lsl #32", + "movk x20, #0x706, lsl #48", + "dup v5.2d, x20", + "add v4.16b, v5.16b, v4.16b", + "tbl v16.16b, {v17.16b}, v4.16b", + "ushr v3.2d, v3.2d, #1", + "mov w0, #0x1", + "dup v4.2d, x0", + "and v3.16b, v3.16b, v4.16b", + "trn1 v3.16b, v3.16b, v3.16b", + "trn1 v3.8h, v3.8h, v3.8h", + "trn1 v3.4s, v3.4s, v3.4s", + "shl v3.16b, v3.16b, #3", + "dup v4.2d, x20", + "add v3.16b, v4.16b, v3.16b", + "tbl v2.16b, {v2.16b}, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vtestps xmm0, xmm1": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x0e 128-bit" + ], + "ExpectedArm64ASM": [ + "mov w20, #0x80000000", + "dup v2.4s, w20", + "and v3.16b, v17.16b, v16.16b", + "bic v4.16b, v17.16b, v16.16b", + "and v3.16b, v3.16b, v2.16b", + "and v2.16b, v4.16b, v2.16b", + "umaxv h3, v3.8h", + "umaxv h2, v2.8h", + "umov w20, v3.h[0]", + "umov w21, v2.h[0]", + "mov w27, #0x0", + "mov w26, #0x1", + "cmp x21, #0x0 (0)", + "cset x21, eq", + "tst w20, w20", + "mrs x20, nzcv", + "orr w20, w20, w21, lsl #29", + "msr nzcv, x20" + ] + }, + "vtestps ymm0, ymm1": { + "ExpectedInstructionCount": 24, + "Comment": [ + "Map 2 0b01 0x0e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "mov w27, #0x0", + "mov w26, #0x1", + "and v4.16b, v17.16b, v16.16b", + "and v5.16b, v3.16b, v2.16b", + "ushr v4.4s, v4.4s, #31", + "ushr v5.4s, v5.4s, #31", + "add v4.4s, v5.4s, v4.4s", + "addv s4, v4.4s", + "mov w20, v4.s[0]", + "bic v4.16b, v17.16b, v16.16b", + "bic v2.16b, v3.16b, v2.16b", + "ushr v4.4s, v4.4s, #31", + "ushr v2.4s, v2.4s, #31", + "add v2.4s, v2.4s, v4.4s", + "addv s2, v2.4s", + "mov w21, v2.s[0]", + "cmp x21, #0x0 (0)", + "cset x21, eq", + "tst w20, w20", + "mrs x20, nzcv", + "orr w20, w20, w21, lsl #29", + "msr nzcv, x20" + ] + }, + "vtestpd xmm0, xmm1": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x0f 128-bit" + ], + "ExpectedArm64ASM": [ + "mov x20, #0x8000000000000000", + "dup v2.2d, x20", + "and v3.16b, v17.16b, v16.16b", + "bic v4.16b, v17.16b, v16.16b", + "and v3.16b, v3.16b, v2.16b", + "and v2.16b, v4.16b, v2.16b", + "umaxv h3, v3.8h", + "umaxv h2, v2.8h", + "umov w20, v3.h[0]", + "umov w21, v2.h[0]", + "mov w27, #0x0", + "mov w26, #0x1", + "cmp x21, #0x0 (0)", + "cset x21, eq", + "tst w20, w20", + "mrs x20, nzcv", + "orr w20, w20, w21, lsl #29", + "msr nzcv, x20" + ] + }, + "vtestpd ymm0, ymm1": { + "ExpectedInstructionCount": 24, + "Comment": [ + "Map 2 0b01 0x0f 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "mov w27, #0x0", + "mov w26, #0x1", + "and v4.16b, v17.16b, v16.16b", + "and v5.16b, v3.16b, v2.16b", + "ushr v4.2d, v4.2d, #63", + "ushr v5.2d, v5.2d, #63", + "add v4.2d, v5.2d, v4.2d", + "addp v4.2d, v4.2d, v4.2d", + "mov x20, v4.d[0]", + "bic v4.16b, v17.16b, v16.16b", + "bic v2.16b, v3.16b, v2.16b", + "ushr v4.2d, v4.2d, #63", + "ushr v2.2d, v2.2d, #63", + "add v2.2d, v2.2d, v4.2d", + "addp v2.2d, v2.2d, v2.2d", + "mov x21, v2.d[0]", + "cmp x21, #0x0 (0)", + "cset x21, eq", + "tst w20, w20", + "mrs x20, nzcv", + "orr w20, w20, w21, lsl #29", + "msr nzcv, x20" + ] + }, + "vcvtph2ps xmm0, xmm1": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 2 0b01 0x13 128-bit" + ] + }, + "vcvtph2ps ymm0, xmm1": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 2 0b01 0x13 256-bit" + ] + }, + "vpermps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 23, + "Comment": [ + "Map 2 0b01 0x16 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v4.4s, #0x7, lsl #0", + "mov w20, #0x100", + "movk w20, #0x302, lsl #16", + "dup v5.4s, w20", + "and v6.16b, v17.16b, v4.16b", + "trn1 v6.16b, v6.16b, v6.16b", + "trn1 v6.8h, v6.8h, v6.8h", + "shl v6.16b, v6.16b, #2", + "add v6.16b, v6.16b, v5.16b", + "mov v0.16b, v18.16b", + "mov v1.16b, v3.16b", + "tbl v16.16b, {v0.16b, v1.16b}, v6.16b", + "and v2.16b, v2.16b, v4.16b", + "trn1 v2.16b, v2.16b, v2.16b", + "trn1 v2.8h, v2.8h, v2.8h", + "shl v2.16b, v2.16b, #2", + "add v2.16b, v2.16b, v5.16b", + "mov v0.16b, v18.16b", + "mov v1.16b, v3.16b", + "tbl v2.16b, {v0.16b, v1.16b}, v2.16b", + "str q2, [x28, #16]" + ] + }, + "vptest xmm0, xmm1": { + "ExpectedInstructionCount": 14, + "Comment": [ + "Map 2 0b01 0x16 128-bit" + ], + "ExpectedArm64ASM": [ + "and v2.16b, v16.16b, v17.16b", + "bic v3.16b, v17.16b, v16.16b", + "umaxv h2, v2.8h", + "umaxv h3, v3.8h", + "umov w20, v2.h[0]", + "umov w21, v3.h[0]", + "mov w27, #0x0", + "mov w26, #0x1", + "cmp x21, #0x0 (0)", + "cset x21, eq", + "tst w20, w20", + "mrs x20, nzcv", + "orr w20, w20, w21, lsl #29", + "msr nzcv, x20" + ] + }, + "vptest ymm0, ymm1": { + "ExpectedInstructionCount": 20, + "Comment": [ + "Map 2 0b01 0x16 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "and v4.16b, v16.16b, v17.16b", + "bic v5.16b, v17.16b, v16.16b", + "and v6.16b, v2.16b, v3.16b", + "bic v2.16b, v3.16b, v2.16b", + "umax v3.8h, v4.8h, v6.8h", + "umax v2.8h, v5.8h, v2.8h", + "umaxv h3, v3.8h", + "umaxv h2, v2.8h", + "umov w20, v3.h[0]", + "umov w21, v2.h[0]", + "mov w27, #0x0", + "mov w26, #0x1", + "cmp x21, #0x0 (0)", + "cset x21, eq", + "tst w20, w20", + "mrs x20, nzcv", + "orr w20, w20, w21, lsl #29", + "msr nzcv, x20" + ] + }, + "vbroadcastss xmm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x18 128-bit" + ], + "ExpectedArm64ASM": [ + "ld1r {v16.4s}, [x4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vbroadcastss ymm0, [rax]": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 2 0b01 0x18 256-bit" + ], + "ExpectedArm64ASM": [ + "ld1r {v16.4s}, [x4]", + "str q16, [x28, #16]" + ] + }, + "vbroadcastsd ymm0, [rax]": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 2 0b01 0x19 256-bit" + ], + "ExpectedArm64ASM": [ + "ld1r {v16.2d}, [x4]", + "str q16, [x28, #16]" + ] + }, + "vbroadcastf128 ymm0, [rax]": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 2 0b01 0x1a 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]", + "str q16, [x28, #16]" + ] + }, + "vpabsb xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x1c 128-bit" + ], + "ExpectedArm64ASM": [ + "abs v16.16b, v17.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpabsb ymm0, ymm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 2 0b01 0x1c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "abs v16.16b, v17.16b", + "abs v2.16b, v2.16b", + "str q2, [x28, #16]" + ] + }, + "vpabsw xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x1d 128-bit" + ], + "ExpectedArm64ASM": [ + "abs v16.8h, v17.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpabsw ymm0, ymm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 2 0b01 0x1d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "abs v16.8h, v17.8h", + "abs v2.8h, v2.8h", + "str q2, [x28, #16]" + ] + }, + "vpabsd xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x1e 128-bit" + ], + "ExpectedArm64ASM": [ + "abs v16.4s, v17.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpabsd ymm0, ymm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 2 0b01 0x1e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "abs v16.4s, v17.4s", + "abs v2.4s, v2.4s", + "str q2, [x28, #16]" + ] + }, + "vpmovsxbw xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x20 128-bit" + ], + "ExpectedArm64ASM": [ + "sxtl v16.8h, v17.8b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmovsxbw ymm0, xmm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 2 0b01 0x20 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v2.2d, v17.d[1]", + "sxtl v16.8h, v17.8b", + "sxtl v2.8h, v2.8b", + "str q2, [x28, #16]" + ] + }, + "vpmovsxbd xmm0, xmm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 2 0b01 0x21 128-bit" + ], + "ExpectedArm64ASM": [ + "sxtl v2.8h, v17.8b", + "sxtl v16.4s, v2.4h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmovsxbd ymm0, xmm1": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x21 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v2.4s, v17.s[1]", + "sxtl v3.8h, v17.8b", + "sxtl v16.4s, v3.4h", + "sxtl v2.8h, v2.8b", + "sxtl v2.4s, v2.4h", + "str q2, [x28, #16]" + ] + }, + "vpmovsxbq xmm0, xmm1": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x22 128-bit" + ], + "ExpectedArm64ASM": [ + "sxtl v2.8h, v17.8b", + "sxtl v2.4s, v2.4h", + "sxtl v16.2d, v2.2s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmovsxbq ymm0, xmm1": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x22 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v2.8h, v17.h[1]", + "sxtl v3.8h, v17.8b", + "sxtl v3.4s, v3.4h", + "sxtl v16.2d, v3.2s", + "sxtl v2.8h, v2.8b", + "sxtl v2.4s, v2.4h", + "sxtl v2.2d, v2.2s", + "str q2, [x28, #16]" + ] + }, + "vpmovsxwd xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x23 128-bit" + ], + "ExpectedArm64ASM": [ + "sxtl v16.4s, v17.4h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmovsxwd ymm0, xmm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 2 0b01 0x23 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v2.2d, v17.d[1]", + "sxtl v16.4s, v17.4h", + "sxtl v2.4s, v2.4h", + "str q2, [x28, #16]" + ] + }, + "vpmovsxwq xmm0, xmm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 2 0b01 0x24 128-bit" + ], + "ExpectedArm64ASM": [ + "sxtl v2.4s, v17.4h", + "sxtl v16.2d, v2.2s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmovsxwq ymm0, xmm1": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x24 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v2.4s, v17.s[1]", + "sxtl v3.4s, v17.4h", + "sxtl v16.2d, v3.2s", + "sxtl v2.4s, v2.4h", + "sxtl v2.2d, v2.2s", + "str q2, [x28, #16]" + ] + }, + "vpmovsxdq xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x25 128-bit" + ], + "ExpectedArm64ASM": [ + "sxtl v16.2d, v17.2s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmovsxdq ymm0, xmm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 2 0b01 0x25 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v2.2d, v17.d[1]", + "sxtl v16.2d, v17.2s", + "sxtl v2.2d, v2.2s", + "str q2, [x28, #16]" + ] + }, + "vpmuldq xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x28 128-bit" + ], + "ExpectedArm64ASM": [ + "uzp1 v2.4s, v17.4s, v17.4s", + "uzp1 v3.4s, v18.4s, v18.4s", + "smull v16.2d, v2.2s, v3.2s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmuldq ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 2 0b01 0x28 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "uzp1 v4.4s, v17.4s, v17.4s", + "uzp1 v5.4s, v18.4s, v18.4s", + "smull v16.2d, v4.2s, v5.2s", + "uzp1 v2.4s, v2.4s, v2.4s", + "uzp1 v3.4s, v3.4s, v3.4s", + "smull v2.2d, v2.2s, v3.2s", + "str q2, [x28, #16]" + ] + }, + "vpcmpeqq xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x29 128-bit" + ], + "ExpectedArm64ASM": [ + "cmeq v16.2d, v17.2d, v18.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpcmpeqq ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x29 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "cmeq v16.2d, v17.2d, v18.2d", + "cmeq v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vmovntdqa xmm0, [rax]": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 2 0b01 0x2a 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]" + ] + }, + "vmovntdqa ymm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x2a 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]", + "ldr q2, [x4, #16]", + "str q2, [x28, #16]" + ] + }, + "vpackusdw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 2 0b01 0x2b 128-bit" + ], + "ExpectedArm64ASM": [ + "sqxtun v16.4h, v17.4s", + "sqxtun2 v16.8h, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpackusdw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0x2b 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "sqxtun v16.4h, v17.4s", + "sqxtun2 v16.8h, v18.4s", + "sqxtun v2.4h, v2.4s", + "sqxtun2 v2.8h, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vmaskmovps xmm0, xmm1, [rax]": { + "ExpectedInstructionCount": 19, + "Comment": [ + "Map 2 0b01 0x2c 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "movi v0.2d, #0x0", + "mov x0, v17.d[0]", + "tbz x0, #63, #+0x8", + "ld1 {v0.s}[0], [x4]", + "add x1, x4, #0x4 (4)", + "tbz w0, #31, #+0x8", + "ld1 {v0.s}[1], [x1]", + "add x1, x1, #0x4 (4)", + "mov x0, v17.d[1]", + "tbz x0, #63, #+0x8", + "ld1 {v0.s}[2], [x1]", + "add x1, x1, #0x4 (4)", + "tbz w0, #31, #+0x8", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]", + "msr nzcv, x20" + ] + }, + "vmaskmovps ymm0, ymm1, [rax]": { + "ExpectedInstructionCount": 35, + "Comment": [ + "Map 2 0b01 0x2c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "movi v0.2d, #0x0", + "mov x0, v17.d[0]", + "tbz x0, #63, #+0x8", + "ld1 {v0.s}[0], [x4]", + "add x1, x4, #0x4 (4)", + "tbz w0, #31, #+0x8", + "ld1 {v0.s}[1], [x1]", + "add x1, x1, #0x4 (4)", + "mov x0, v17.d[1]", + "tbz x0, #63, #+0x8", + "ld1 {v0.s}[2], [x1]", + "add x1, x1, #0x4 (4)", + "tbz w0, #31, #+0x8", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "add x21, x4, #0x10 (16)", + "movi v0.2d, #0x0", + "mov x0, v2.d[0]", + "tbz x0, #63, #+0x8", + "ld1 {v0.s}[0], [x21]", + "add x1, x21, #0x4 (4)", + "tbz w0, #31, #+0x8", + "ld1 {v0.s}[1], [x1]", + "add x1, x1, #0x4 (4)", + "mov x0, v2.d[1]", + "tbz x0, #63, #+0x8", + "ld1 {v0.s}[2], [x1]", + "add x1, x1, #0x4 (4)", + "tbz w0, #31, #+0x8", + "ld1 {v0.s}[3], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "msr nzcv, x20" + ] + }, + "vmaskmovpd xmm0, xmm1, [rax]": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 2 0b01 0x2d 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "movi v0.2d, #0x0", + "mov x0, v17.d[0]", + "tbz x0, #63, #+0x8", + "ld1 {v0.d}[0], [x4]", + "add x1, x4, #0x8 (8)", + "mov x0, v17.d[1]", + "tbz x0, #63, #+0x8", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]", + "msr nzcv, x20" + ] + }, + "vmaskmovpd ymm0, ymm1, [rax]": { + "ExpectedInstructionCount": 23, + "Comment": [ + "Map 2 0b01 0x2d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "movi v0.2d, #0x0", + "mov x0, v17.d[0]", + "tbz x0, #63, #+0x8", + "ld1 {v0.d}[0], [x4]", + "add x1, x4, #0x8 (8)", + "mov x0, v17.d[1]", + "tbz x0, #63, #+0x8", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "add x21, x4, #0x10 (16)", + "movi v0.2d, #0x0", + "mov x0, v2.d[0]", + "tbz x0, #63, #+0x8", + "ld1 {v0.d}[0], [x21]", + "add x1, x21, #0x8 (8)", + "mov x0, v2.d[1]", + "tbz x0, #63, #+0x8", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "msr nzcv, x20" + ] + }, + "vmaskmovps [rax], xmm0, xmm1": { + "ExpectedInstructionCount": 15, + "Comment": [ + "Map 2 0b01 0x2e 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov x0, v16.d[0]", + "tbz x0, #63, #+0x8", + "st1 {v17.s}[0], [x4]", + "add x1, x4, #0x4 (4)", + "tbz w0, #31, #+0x8", + "st1 {v17.s}[1], [x1]", + "add x1, x1, #0x4 (4)", + "mov x0, v16.d[1]", + "tbz x0, #63, #+0x8", + "st1 {v17.s}[2], [x1]", + "add x1, x1, #0x4 (4)", + "tbz w0, #31, #+0x8", + "st1 {v17.s}[3], [x1]", + "msr nzcv, x20" + ] + }, + "vmaskmovps [rax], ymm0, ymm1": { + "ExpectedInstructionCount": 31, + "Comment": [ + "Map 2 0b01 0x2e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "mrs x20, nzcv", + "mov x0, v16.d[0]", + "tbz x0, #63, #+0x8", + "st1 {v17.s}[0], [x4]", + "add x1, x4, #0x4 (4)", + "tbz w0, #31, #+0x8", + "st1 {v17.s}[1], [x1]", + "add x1, x1, #0x4 (4)", + "mov x0, v16.d[1]", + "tbz x0, #63, #+0x8", + "st1 {v17.s}[2], [x1]", + "add x1, x1, #0x4 (4)", + "tbz w0, #31, #+0x8", + "st1 {v17.s}[3], [x1]", + "add x21, x4, #0x10 (16)", + "mov x0, v2.d[0]", + "tbz x0, #63, #+0x8", + "st1 {v3.s}[0], [x21]", + "add x1, x21, #0x4 (4)", + "tbz w0, #31, #+0x8", + "st1 {v3.s}[1], [x1]", + "add x1, x1, #0x4 (4)", + "mov x0, v2.d[1]", + "tbz x0, #63, #+0x8", + "st1 {v3.s}[2], [x1]", + "add x1, x1, #0x4 (4)", + "tbz w0, #31, #+0x8", + "st1 {v3.s}[3], [x1]", + "msr nzcv, x20" + ] + }, + "vmaskmovpd [rax], xmm0, xmm1": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 2 0b01 0x2f 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov x0, v16.d[0]", + "tbz x0, #63, #+0x8", + "st1 {v17.d}[0], [x4]", + "add x1, x4, #0x8 (8)", + "mov x0, v16.d[1]", + "tbz x0, #63, #+0x8", + "st1 {v17.d}[1], [x1]", + "msr nzcv, x20" + ] + }, + "vmaskmovpd [rax], ymm0, ymm1": { + "ExpectedInstructionCount": 19, + "Comment": [ + "Map 2 0b01 0x2f 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "mrs x20, nzcv", + "mov x0, v16.d[0]", + "tbz x0, #63, #+0x8", + "st1 {v17.d}[0], [x4]", + "add x1, x4, #0x8 (8)", + "mov x0, v16.d[1]", + "tbz x0, #63, #+0x8", + "st1 {v17.d}[1], [x1]", + "add x21, x4, #0x10 (16)", + "mov x0, v2.d[0]", + "tbz x0, #63, #+0x8", + "st1 {v3.d}[0], [x21]", + "add x1, x21, #0x8 (8)", + "mov x0, v2.d[1]", + "tbz x0, #63, #+0x8", + "st1 {v3.d}[1], [x1]", + "msr nzcv, x20" + ] + }, + "vpmovzxbw xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x30 128-bit" + ], + "ExpectedArm64ASM": [ + "uxtl v16.8h, v17.8b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmovzxbw ymm0, xmm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 2 0b01 0x30 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v2.2d, v17.d[1]", + "uxtl v16.8h, v17.8b", + "uxtl v2.8h, v2.8b", + "str q2, [x28, #16]" + ] + }, + "vpmovzxbd xmm0, xmm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 2 0b01 0x31 128-bit" + ], + "ExpectedArm64ASM": [ + "uxtl v2.8h, v17.8b", + "uxtl v16.4s, v2.4h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmovzxbd ymm0, xmm1": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x31 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v2.4s, v17.s[1]", + "uxtl v3.8h, v17.8b", + "uxtl v16.4s, v3.4h", + "uxtl v2.8h, v2.8b", + "uxtl v2.4s, v2.4h", + "str q2, [x28, #16]" + ] + }, + "vpmovzxbq xmm0, xmm1": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x32 128-bit" + ], + "ExpectedArm64ASM": [ + "uxtl v2.8h, v17.8b", + "uxtl v2.4s, v2.4h", + "uxtl v16.2d, v2.2s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmovzxbq ymm0, xmm1": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x32 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v2.8h, v17.h[1]", + "uxtl v3.8h, v17.8b", + "uxtl v3.4s, v3.4h", + "uxtl v16.2d, v3.2s", + "uxtl v2.8h, v2.8b", + "uxtl v2.4s, v2.4h", + "uxtl v2.2d, v2.2s", + "str q2, [x28, #16]" + ] + }, + "vpmovzxwd xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x33 128-bit" + ], + "ExpectedArm64ASM": [ + "uxtl v16.4s, v17.4h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmovzxwd ymm0, xmm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 2 0b01 0x33 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v2.2d, v17.d[1]", + "uxtl v16.4s, v17.4h", + "uxtl v2.4s, v2.4h", + "str q2, [x28, #16]" + ] + }, + "vpmovzxwq xmm0, xmm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 2 0b01 0x34 128-bit" + ], + "ExpectedArm64ASM": [ + "uxtl v2.4s, v17.4h", + "uxtl v16.2d, v2.2s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmovzxwq ymm0, xmm1": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x34 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v2.4s, v17.s[1]", + "uxtl v3.4s, v17.4h", + "uxtl v16.2d, v3.2s", + "uxtl v2.4s, v2.4h", + "uxtl v2.2d, v2.2s", + "str q2, [x28, #16]" + ] + }, + "vpmovzxdq xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x35 128-bit" + ], + "ExpectedArm64ASM": [ + "uxtl v16.2d, v17.2s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmovzxdq ymm0, xmm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 2 0b01 0x35 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v2.2d, v17.d[1]", + "uxtl v16.2d, v17.2s", + "uxtl v2.2d, v2.2s", + "str q2, [x28, #16]" + ] + }, + "vpermd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 23, + "Comment": [ + "Map 2 0b01 0x36 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v4.4s, #0x7, lsl #0", + "mov w20, #0x100", + "movk w20, #0x302, lsl #16", + "dup v5.4s, w20", + "and v6.16b, v17.16b, v4.16b", + "trn1 v6.16b, v6.16b, v6.16b", + "trn1 v6.8h, v6.8h, v6.8h", + "shl v6.16b, v6.16b, #2", + "add v6.16b, v6.16b, v5.16b", + "mov v0.16b, v18.16b", + "mov v1.16b, v3.16b", + "tbl v16.16b, {v0.16b, v1.16b}, v6.16b", + "and v2.16b, v2.16b, v4.16b", + "trn1 v2.16b, v2.16b, v2.16b", + "trn1 v2.8h, v2.8h, v2.8h", + "shl v2.16b, v2.16b, #2", + "add v2.16b, v2.16b, v5.16b", + "mov v0.16b, v18.16b", + "mov v1.16b, v3.16b", + "tbl v2.16b, {v0.16b, v1.16b}, v2.16b", + "str q2, [x28, #16]" + ] + }, + "vpcmpgtq xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x37 128-bit" + ], + "ExpectedArm64ASM": [ + "cmgt v16.2d, v17.2d, v18.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpcmpgtq ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x37 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "cmgt v16.2d, v17.2d, v18.2d", + "cmgt v2.2d, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vpminsb xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x38 128-bit" + ], + "ExpectedArm64ASM": [ + "smin v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpminsb ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 2 0b01 0x38 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "smin v16.16b, v17.16b, v16.16b", + "smin v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpminsb ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 2 0b01 0x38 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "smin v16.16b, v16.16b, v18.16b", + "smin v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpminsb ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x38 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "smin v16.16b, v17.16b, v18.16b", + "smin v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpminsd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x39 128-bit" + ], + "ExpectedArm64ASM": [ + "smin v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpminsd ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 2 0b01 0x39 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "smin v16.4s, v17.4s, v16.4s", + "smin v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpminsd ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 2 0b01 0x39 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "smin v16.4s, v16.4s, v18.4s", + "smin v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpminsd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x39 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "smin v16.4s, v17.4s, v18.4s", + "smin v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpminuw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x3a 128-bit" + ], + "ExpectedArm64ASM": [ + "umin v16.8h, v17.8h, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpminuw ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 2 0b01 0x3a 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "umin v16.8h, v17.8h, v16.8h", + "umin v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpminuw ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 2 0b01 0x3a 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "umin v16.8h, v16.8h, v18.8h", + "umin v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpminuw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x3a 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "umin v16.8h, v17.8h, v18.8h", + "umin v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpminud xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x3b 128-bit" + ], + "ExpectedArm64ASM": [ + "umin v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpminud ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 2 0b01 0x3b 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "umin v16.4s, v17.4s, v16.4s", + "umin v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpminud ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 2 0b01 0x3b 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "umin v16.4s, v16.4s, v18.4s", + "umin v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpminud ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x3b 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "umin v16.4s, v17.4s, v18.4s", + "umin v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpmaxsb xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x3c 128-bit" + ], + "ExpectedArm64ASM": [ + "smax v16.16b, v17.16b, v18.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmaxsb ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 2 0b01 0x3c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "smax v16.16b, v16.16b, v18.16b", + "smax v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpmaxsb ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x3c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "smax v16.16b, v17.16b, v16.16b", + "smax v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpmaxsb ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x3c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "smax v16.16b, v17.16b, v18.16b", + "smax v2.16b, v2.16b, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vpmaxsd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x3d 128-bit" + ], + "ExpectedArm64ASM": [ + "smax v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmaxsd ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 2 0b01 0x3d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "smax v16.4s, v17.4s, v16.4s", + "smax v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpmaxsd ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 2 0b01 0x3d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "smax v16.4s, v16.4s, v18.4s", + "smax v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpmaxsd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x3d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "smax v16.4s, v17.4s, v18.4s", + "smax v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpmaxuw xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x3e 128-bit" + ], + "ExpectedArm64ASM": [ + "umax v16.8h, v17.8h, v18.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmaxuw ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 2 0b01 0x3e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "umax v16.8h, v17.8h, v16.8h", + "umax v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpmaxuw ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 2 0b01 0x3e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "umax v16.8h, v16.8h, v18.8h", + "umax v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpmaxuw ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x3e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "umax v16.8h, v17.8h, v18.8h", + "umax v2.8h, v2.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vpmaxud xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x3f 128-bit" + ], + "ExpectedArm64ASM": [ + "umax v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmaxud ymm0, ymm0, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Aliasing source and destination", + "Map 2 0b01 0x3f 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #48]", + "umax v16.4s, v16.4s, v18.4s", + "umax v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpmaxud ymm0, ymm1, ymm0": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x3f 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #16]", + "umax v16.4s, v17.4s, v16.4s", + "umax v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpmaxud ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x3f 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "umax v16.4s, v17.4s, v18.4s", + "umax v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vpmulld xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x40 128-bit" + ], + "ExpectedArm64ASM": [ + "mul v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpmulld ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x40 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "mul v16.4s, v17.4s, v18.4s", + "mul v2.4s, v2.4s, v3.4s", + "str q2, [x28, #16]" + ] + }, + "vphminposuw xmm0, xmm1": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x41 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2352]", + "zip1 v3.8h, v2.8h, v17.8h", + "zip2 v2.8h, v2.8h, v17.8h", + "umin v2.4s, v3.4s, v2.4s", + "uminv s2, v2.4s", + "rev32 v16.8h, v2.8h", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrlvd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x45 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v0.4s, #0x20, lsl #0", + "umin v0.4s, v0.4s, v18.4s", + "neg v0.4s, v0.4s", + "ushl v16.4s, v17.4s, v0.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrlvd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0x45 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v0.4s, #0x20, lsl #0", + "umin v0.4s, v0.4s, v18.4s", + "neg v0.4s, v0.4s", + "ushl v16.4s, v17.4s, v0.4s", + "movi v0.4s, #0x20, lsl #0", + "umin v0.4s, v0.4s, v3.4s", + "neg v0.4s, v0.4s", + "ushl v2.4s, v2.4s, v0.4s", + "str q2, [x28, #16]" + ] + }, + "vpsrlvq xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x45 128-bit" + ], + "ExpectedArm64ASM": [ + "mov w0, #0x40", + "dup v0.2d, x0", + "cmhi v1.2d, v18.2d, v0.2d", + "bif v0.16b, v18.16b, v1.16b", + "neg v0.2d, v0.2d", + "ushl v16.2d, v17.2d, v0.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrlvq ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 15, + "Comment": [ + "Map 2 0b01 0x45 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "mov w0, #0x40", + "dup v0.2d, x0", + "cmhi v1.2d, v18.2d, v0.2d", + "bif v0.16b, v18.16b, v1.16b", + "neg v0.2d, v0.2d", + "ushl v16.2d, v17.2d, v0.2d", + "mov w0, #0x40", + "dup v0.2d, x0", + "cmhi v1.2d, v3.2d, v0.2d", + "bif v0.16b, v3.16b, v1.16b", + "neg v0.2d, v0.2d", + "ushl v2.2d, v2.2d, v0.2d", + "str q2, [x28, #16]" + ] + }, + "vpsravd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x46 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v0.4s, #0x1f, lsl #0", + "umin v0.4s, v0.4s, v18.4s", + "neg v0.4s, v0.4s", + "sshl v16.4s, v17.4s, v0.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsravd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v0.4s, #0x1f, lsl #0", + "umin v0.4s, v0.4s, v18.4s", + "neg v0.4s, v0.4s", + "sshl v16.4s, v17.4s, v0.4s", + "movi v0.4s, #0x1f, lsl #0", + "umin v0.4s, v0.4s, v3.4s", + "neg v0.4s, v0.4s", + "sshl v2.4s, v2.4s, v0.4s", + "str q2, [x28, #16]" + ] + }, + "vpsllvd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x47 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v0.4s, #0x20, lsl #0", + "umin v0.4s, v0.4s, v18.4s", + "ushl v16.4s, v17.4s, v0.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsllvd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 2 0b01 0x47 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v0.4s, #0x20, lsl #0", + "umin v0.4s, v0.4s, v18.4s", + "ushl v16.4s, v17.4s, v0.4s", + "movi v0.4s, #0x20, lsl #0", + "umin v0.4s, v0.4s, v3.4s", + "ushl v2.4s, v2.4s, v0.4s", + "str q2, [x28, #16]" + ] + }, + "vpsllvq xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0x47 128-bit" + ], + "ExpectedArm64ASM": [ + "mov w0, #0x40", + "dup v0.2d, x0", + "cmhi v1.2d, v18.2d, v0.2d", + "bif v0.16b, v18.16b, v1.16b", + "ushl v16.2d, v17.2d, v0.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsllvq ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 2 0b01 0x47 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "mov w0, #0x40", + "dup v0.2d, x0", + "cmhi v1.2d, v18.2d, v0.2d", + "bif v0.16b, v18.16b, v1.16b", + "ushl v16.2d, v17.2d, v0.2d", + "mov w0, #0x40", + "dup v0.2d, x0", + "cmhi v1.2d, v3.2d, v0.2d", + "bif v0.16b, v3.16b, v1.16b", + "ushl v2.2d, v2.2d, v0.2d", + "str q2, [x28, #16]" + ] + }, + "vpbroadcastd xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x58 128-bit" + ], + "ExpectedArm64ASM": [ + "dup v16.4s, v17.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpbroadcastd xmm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x58 128-bit" + ], + "ExpectedArm64ASM": [ + "ld1r {v16.4s}, [x4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpbroadcastd ymm0, xmm1": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 2 0b01 0x58 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v16.4s, v17.s[0]", + "str q16, [x28, #16]" + ] + }, + "vpbroadcastd ymm0, [rax]": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 2 0b01 0x58 256-bit" + ], + "ExpectedArm64ASM": [ + "ld1r {v16.4s}, [x4]", + "str q16, [x28, #16]" + ] + }, + "vpbroadcastq xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x59 128-bit" + ], + "ExpectedArm64ASM": [ + "dup v16.2d, v17.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpbroadcastq xmm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x59 128-bit" + ], + "ExpectedArm64ASM": [ + "ld1r {v16.2d}, [x4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpbroadcastq ymm0, xmm1": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 2 0b01 0x59 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v16.2d, v17.d[0]", + "str q16, [x28, #16]" + ] + }, + "vpbroadcastq ymm0, [rax]": { + "ExpectedInstructiqonCount": -1, + "Comment": [ + "Map 2 0b01 0x59 256-bit" + ], + "ExpectedInstructionCount": 2, + "ExpectedArm64ASM": [ + "ld1r {v16.2d}, [x4]", + "str q16, [x28, #16]" + ] + }, + "vbroadcasti128 ymm0, [rax]": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 2 0b01 0x5a 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x4]", + "str q16, [x28, #16]" + ] + }, + "vpbroadcastb xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x78 128-bit" + ], + "ExpectedArm64ASM": [ + "dup v16.16b, v17.b[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpbroadcastb xmm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x78 128-bit" + ], + "ExpectedArm64ASM": [ + "ld1r {v16.16b}, [x4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpbroadcastb ymm0, xmm1": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 2 0b01 0x78 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v16.16b, v17.b[0]", + "str q16, [x28, #16]" + ] + }, + "vpbroadcastb ymm0, [rax]": { + "ExpectedInstructiqonCount": -1, + "Comment": [ + "Map 2 0b01 0x78 256-bit" + ], + "ExpectedInstructionCount": 2, + "ExpectedArm64ASM": [ + "ld1r {v16.16b}, [x4]", + "str q16, [x28, #16]" + ] + }, + "vpbroadcastw xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x79 128-bit" + ], + "ExpectedArm64ASM": [ + "dup v16.8h, v17.h[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpbroadcastw xmm0, [rax]": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0x79 128-bit" + ], + "ExpectedArm64ASM": [ + "ld1r {v16.8h}, [x4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpbroadcastw ymm0, xmm1": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 2 0b01 0x79 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v16.8h, v17.h[0]", + "str q16, [x28, #16]" + ] + }, + "vpbroadcastw ymm0, [rax]": { + "ExpectedInstructiqonCount": -1, + "Comment": [ + "Map 2 0b01 0x79 256-bit" + ], + "ExpectedInstructionCount": 2, + "ExpectedArm64ASM": [ + "ld1r {v16.8h}, [x4]", + "str q16, [x28, #16]" + ] + }, + "vpmaskmovd xmm0, xmm1, [rax]": { + "ExpectedInstructionCount": 19, + "Comment": [ + "Map 2 0b01 0x8c 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "movi v0.2d, #0x0", + "mov x0, v17.d[0]", + "tbz x0, #63, #+0x8", + "ld1 {v0.s}[0], [x4]", + "add x1, x4, #0x4 (4)", + "tbz w0, #31, #+0x8", + "ld1 {v0.s}[1], [x1]", + "add x1, x1, #0x4 (4)", + "mov x0, v17.d[1]", + "tbz x0, #63, #+0x8", + "ld1 {v0.s}[2], [x1]", + "add x1, x1, #0x4 (4)", + "tbz w0, #31, #+0x8", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]", + "msr nzcv, x20" + ] + }, + "vpmaskmovd ymm0, ymm1, [rax]": { + "ExpectedInstructionCount": 35, + "Comment": [ + "Map 2 0b01 0x8c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "movi v0.2d, #0x0", + "mov x0, v17.d[0]", + "tbz x0, #63, #+0x8", + "ld1 {v0.s}[0], [x4]", + "add x1, x4, #0x4 (4)", + "tbz w0, #31, #+0x8", + "ld1 {v0.s}[1], [x1]", + "add x1, x1, #0x4 (4)", + "mov x0, v17.d[1]", + "tbz x0, #63, #+0x8", + "ld1 {v0.s}[2], [x1]", + "add x1, x1, #0x4 (4)", + "tbz w0, #31, #+0x8", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "add x21, x4, #0x10 (16)", + "movi v0.2d, #0x0", + "mov x0, v2.d[0]", + "tbz x0, #63, #+0x8", + "ld1 {v0.s}[0], [x21]", + "add x1, x21, #0x4 (4)", + "tbz w0, #31, #+0x8", + "ld1 {v0.s}[1], [x1]", + "add x1, x1, #0x4 (4)", + "mov x0, v2.d[1]", + "tbz x0, #63, #+0x8", + "ld1 {v0.s}[2], [x1]", + "add x1, x1, #0x4 (4)", + "tbz w0, #31, #+0x8", + "ld1 {v0.s}[3], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "msr nzcv, x20" + ] + }, + "vpmaskmovq xmm0, xmm1, [rax]": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 2 0b01 0x8c 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "movi v0.2d, #0x0", + "mov x0, v17.d[0]", + "tbz x0, #63, #+0x8", + "ld1 {v0.d}[0], [x4]", + "add x1, x4, #0x8 (8)", + "mov x0, v17.d[1]", + "tbz x0, #63, #+0x8", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]", + "msr nzcv, x20" + ] + }, + "vpmaskmovq ymm0, ymm1, [rax]": { + "ExpectedInstructionCount": 23, + "Comment": [ + "Map 2 0b01 0x8c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "movi v0.2d, #0x0", + "mov x0, v17.d[0]", + "tbz x0, #63, #+0x8", + "ld1 {v0.d}[0], [x4]", + "add x1, x4, #0x8 (8)", + "mov x0, v17.d[1]", + "tbz x0, #63, #+0x8", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "add x21, x4, #0x10 (16)", + "movi v0.2d, #0x0", + "mov x0, v2.d[0]", + "tbz x0, #63, #+0x8", + "ld1 {v0.d}[0], [x21]", + "add x1, x21, #0x8 (8)", + "mov x0, v2.d[1]", + "tbz x0, #63, #+0x8", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "msr nzcv, x20" + ] + }, + "vpmaskmovd [rax], xmm0, xmm1": { + "ExpectedInstructionCount": 15, + "Comment": [ + "Map 2 0b01 0x8e 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov x0, v16.d[0]", + "tbz x0, #63, #+0x8", + "st1 {v17.s}[0], [x4]", + "add x1, x4, #0x4 (4)", + "tbz w0, #31, #+0x8", + "st1 {v17.s}[1], [x1]", + "add x1, x1, #0x4 (4)", + "mov x0, v16.d[1]", + "tbz x0, #63, #+0x8", + "st1 {v17.s}[2], [x1]", + "add x1, x1, #0x4 (4)", + "tbz w0, #31, #+0x8", + "st1 {v17.s}[3], [x1]", + "msr nzcv, x20" + ] + }, + "vpmaskmovd [rax], ymm0, ymm1": { + "ExpectedInstructionCount": 31, + "Comment": [ + "Map 2 0b01 0x8e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "mrs x20, nzcv", + "mov x0, v16.d[0]", + "tbz x0, #63, #+0x8", + "st1 {v17.s}[0], [x4]", + "add x1, x4, #0x4 (4)", + "tbz w0, #31, #+0x8", + "st1 {v17.s}[1], [x1]", + "add x1, x1, #0x4 (4)", + "mov x0, v16.d[1]", + "tbz x0, #63, #+0x8", + "st1 {v17.s}[2], [x1]", + "add x1, x1, #0x4 (4)", + "tbz w0, #31, #+0x8", + "st1 {v17.s}[3], [x1]", + "add x21, x4, #0x10 (16)", + "mov x0, v2.d[0]", + "tbz x0, #63, #+0x8", + "st1 {v3.s}[0], [x21]", + "add x1, x21, #0x4 (4)", + "tbz w0, #31, #+0x8", + "st1 {v3.s}[1], [x1]", + "add x1, x1, #0x4 (4)", + "mov x0, v2.d[1]", + "tbz x0, #63, #+0x8", + "st1 {v3.s}[2], [x1]", + "add x1, x1, #0x4 (4)", + "tbz w0, #31, #+0x8", + "st1 {v3.s}[3], [x1]", + "msr nzcv, x20" + ] + }, + "vpmaskmovq [rax], xmm0, xmm1": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 2 0b01 0x8e 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov x0, v16.d[0]", + "tbz x0, #63, #+0x8", + "st1 {v17.d}[0], [x4]", + "add x1, x4, #0x8 (8)", + "mov x0, v16.d[1]", + "tbz x0, #63, #+0x8", + "st1 {v17.d}[1], [x1]", + "msr nzcv, x20" + ] + }, + "vpmaskmovq [rax], ymm0, ymm1": { + "ExpectedInstructionCount": 19, + "Comment": [ + "Map 2 0b01 0x8e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "mrs x20, nzcv", + "mov x0, v16.d[0]", + "tbz x0, #63, #+0x8", + "st1 {v17.d}[0], [x4]", + "add x1, x4, #0x8 (8)", + "mov x0, v16.d[1]", + "tbz x0, #63, #+0x8", + "st1 {v17.d}[1], [x1]", + "add x21, x4, #0x10 (16)", + "mov x0, v2.d[0]", + "tbz x0, #63, #+0x8", + "st1 {v3.d}[0], [x21]", + "add x1, x21, #0x8 (8)", + "mov x0, v2.d[1]", + "tbz x0, #63, #+0x8", + "st1 {v3.d}[1], [x1]", + "msr nzcv, x20" + ] + }, + "vpgatherdd xmm0, [xmm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 27, + "Comment": [ + "Map 2 0b01 0x90 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdd xmm0, [xmm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 27, + "Comment": [ + "Map 2 0b01 0x90 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdd xmm0, [xmm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 27, + "Comment": [ + "Map 2 0b01 0x90 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdd ymm0, [ymm1*1 + rax], ymm2": { + "ExpectedInstructionCount": 52, + "Comment": [ + "Map 2 0b01 0x90 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov w0, v4.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[0]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[0], [x1]", + "mov w0, v4.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[1]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[1], [x1]", + "mov w0, v4.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[2]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[2], [x1]", + "mov w0, v4.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[3]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[3], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdd ymm0, [ymm1*2 + rax], ymm2": { + "ExpectedInstructionCount": 52, + "Comment": [ + "Map 2 0b01 0x90 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov w0, v4.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v4.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[1], [x1]", + "mov w0, v4.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[2]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[2], [x1]", + "mov w0, v4.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[3]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[3], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdd ymm0, [ymm1*4 + rax], ymm2": { + "ExpectedInstructionCount": 52, + "Comment": [ + "Map 2 0b01 0x90 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov w0, v4.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[0]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[0], [x1]", + "mov w0, v4.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[1]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[1], [x1]", + "mov w0, v4.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[2]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[2], [x1]", + "mov w0, v4.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[3]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[3], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdd ymm0, [ymm1*8 + rax], ymm2": { + "ExpectedInstructionCount": 52, + "Comment": [ + "Map 2 0b01 0x90 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov w0, v4.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v4.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[1], [x1]", + "mov w0, v4.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[2]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[2], [x1]", + "mov w0, v4.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[3]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[3], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdq xmm0, [xmm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x90 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdq xmm0, [xmm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x90 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdq xmm0, [xmm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x90 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdq xmm0, [xmm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x90 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdq ymm0, [xmm1*1 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x90 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdq ymm0, [xmm1*2 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x90 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdq ymm0, [xmm1*4 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x90 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdq ymm0, [xmm1*8 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x90 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqd xmm0, [xmm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x91 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0", + "ld1 {v0.s}[1], [x1]", + "mov v2.16b, v0.16b", + "movi v18.2d, #0x0", + "zip1 v16.2d, v2.2d, v18.2d", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqd xmm0, [xmm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x91 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[1], [x1]", + "mov v2.16b, v0.16b", + "movi v18.2d, #0x0", + "zip1 v16.2d, v2.2d, v18.2d", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqd xmm0, [xmm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x91 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[1], [x1]", + "mov v2.16b, v0.16b", + "movi v18.2d, #0x0", + "zip1 v16.2d, v2.2d, v18.2d", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqd xmm0, [xmm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x91 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[1], [x1]", + "mov v2.16b, v0.16b", + "movi v18.2d, #0x0", + "zip1 v16.2d, v2.2d, v18.2d", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqd xmm0, [ymm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 30, + "Comment": [ + "Map 2 0b01 0x91 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[0]", + "add x1, x4, x0", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[1]", + "add x1, x4, x0", + "ld1 {v0.s}[3], [x1]", + "mov v3.16b, v0.16b", + "mov v0.16b, v3.16b", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqd xmm0, [ymm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 30, + "Comment": [ + "Map 2 0b01 0x91 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[3], [x1]", + "mov v3.16b, v0.16b", + "mov v0.16b, v3.16b", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqd xmm0, [ymm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 30, + "Comment": [ + "Map 2 0b01 0x91 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[3], [x1]", + "mov v3.16b, v0.16b", + "mov v0.16b, v3.16b", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqd xmm0, [ymm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 30, + "Comment": [ + "Map 2 0b01 0x91 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[3], [x1]", + "mov v3.16b, v0.16b", + "mov v0.16b, v3.16b", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqq xmm0, [xmm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x91 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqq xmm0, [xmm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x91 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqq xmm0, [xmm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x91 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqq xmm0, [xmm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x91 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqq ymm0, [ymm1*1 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x91 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[0]", + "add x1, x4, x0", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[1]", + "add x1, x4, x0", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqq ymm0, [ymm1*2 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x91 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqq ymm0, [ymm1*4 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x91 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqq ymm0, [ymm1*8 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x91 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdps xmm0, [xmm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 27, + "Comment": [ + "Map 2 0b01 0x92 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdps xmm0, [xmm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 27, + "Comment": [ + "Map 2 0b01 0x92 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdps xmm0, [xmm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 27, + "Comment": [ + "Map 2 0b01 0x92 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdps xmm0, [xmm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 27, + "Comment": [ + "Map 2 0b01 0x92 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdps ymm0, [ymm1*1 + rax], ymm2": { + "ExpectedInstructionCount": 52, + "Comment": [ + "Map 2 0b01 0x92 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov w0, v4.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[0]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[0], [x1]", + "mov w0, v4.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[1]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[1], [x1]", + "mov w0, v4.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[2]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[2], [x1]", + "mov w0, v4.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[3]", + "add x1, x4, w0, sxtw", + "ld1 {v0.s}[3], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdps ymm0, [ymm1*2 + rax], ymm2": { + "ExpectedInstructionCount": 52, + "Comment": [ + "Map 2 0b01 0x92 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov w0, v4.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v4.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[1], [x1]", + "mov w0, v4.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[2]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[2], [x1]", + "mov w0, v4.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[3]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[3], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdps ymm0, [ymm1*4 + rax], ymm2": { + "ExpectedInstructionCount": 52, + "Comment": [ + "Map 2 0b01 0x92 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov w0, v4.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[0]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[0], [x1]", + "mov w0, v4.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[1]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[1], [x1]", + "mov w0, v4.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[2]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[2], [x1]", + "mov w0, v4.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[3]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.s}[3], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdps ymm0, [ymm1*8 + rax], ymm2": { + "ExpectedInstructionCount": 52, + "Comment": [ + "Map 2 0b01 0x92 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov w0, v4.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v4.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[1], [x1]", + "mov w0, v4.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[2]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[2], [x1]", + "mov w0, v4.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[3]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[3], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdpd xmm0, [xmm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x92 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdpd xmm0, [xmm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x92 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdpd xmm0, [xmm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x92 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdpd xmm0, [xmm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x92 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdpd ymm0, [xmm1*1 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x92 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdpd ymm0, [xmm1*2 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x92 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdpd ymm0, [xmm1*4 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x92 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdpd ymm0, [xmm1*8 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x92 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqps xmm0, [xmm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x93 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0", + "ld1 {v0.s}[1], [x1]", + "mov v2.16b, v0.16b", + "movi v18.2d, #0x0", + "zip1 v16.2d, v2.2d, v18.2d", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqps xmm0, [xmm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x93 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[1], [x1]", + "mov v2.16b, v0.16b", + "movi v18.2d, #0x0", + "zip1 v16.2d, v2.2d, v18.2d", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqps xmm0, [xmm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x93 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[1], [x1]", + "mov v2.16b, v0.16b", + "movi v18.2d, #0x0", + "zip1 v16.2d, v2.2d, v18.2d", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqps xmm0, [xmm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x93 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[1], [x1]", + "mov v2.16b, v0.16b", + "movi v18.2d, #0x0", + "zip1 v16.2d, v2.2d, v18.2d", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqps xmm0, [ymm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 30, + "Comment": [ + "Map 2 0b01 0x93 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[0]", + "add x1, x4, x0", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[1]", + "add x1, x4, x0", + "ld1 {v0.s}[3], [x1]", + "mov v3.16b, v0.16b", + "mov v0.16b, v3.16b", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqps xmm0, [ymm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 30, + "Comment": [ + "Map 2 0b01 0x93 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[3], [x1]", + "mov v3.16b, v0.16b", + "mov v0.16b, v3.16b", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqps xmm0, [ymm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 30, + "Comment": [ + "Map 2 0b01 0x93 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[3], [x1]", + "mov v3.16b, v0.16b", + "mov v0.16b, v3.16b", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqps xmm0, [ymm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 30, + "Comment": [ + "Map 2 0b01 0x93 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[3], [x1]", + "mov v3.16b, v0.16b", + "mov v0.16b, v3.16b", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqpd xmm0, [xmm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x93 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqpd xmm0, [xmm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x93 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqpd xmm0, [xmm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x93 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqpd xmm0, [xmm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x93 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqpd ymm0, [ymm1*1 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x93 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[0]", + "add x1, x4, x0", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[1]", + "add x1, x4, x0", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqpd ymm0, [ymm1*2 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x93 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqpd ymm0, [ymm1*4 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x93 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqpd ymm0, [ymm1*8 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x93 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vfmaddsub132ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0x96 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2384]", + "eor v2.16b, v17.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmaddsub132ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0x96 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2384]", + "eor v6.16b, v17.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "eor v3.16b, v3.16b, v5.16b", + "fmla v3.4s, v2.4s, v4.4s", + "str q3, [x28, #16]" + ] + }, + "vfmaddsub132pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0x96 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2416]", + "eor v2.16b, v17.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmaddsub132pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0x96 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2416]", + "eor v6.16b, v17.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "eor v3.16b, v3.16b, v5.16b", + "fmla v3.2d, v2.2d, v4.2d", + "str q3, [x28, #16]" + ] + }, + "vfmsubadd132ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0x97 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2448]", + "eor v2.16b, v17.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsubadd132ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0x97 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2448]", + "eor v6.16b, v17.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "eor v3.16b, v3.16b, v5.16b", + "fmla v3.4s, v2.4s, v4.4s", + "str q3, [x28, #16]" + ] + }, + "vfmsubadd132pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0x97 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2480]", + "eor v2.16b, v17.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsubadd132pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0x97 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2480]", + "eor v6.16b, v17.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "eor v3.16b, v3.16b, v5.16b", + "fmla v3.2d, v2.2d, v4.2d", + "str q3, [x28, #16]" + ] + }, + "vfmadd132ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x98 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v17.16b", + "fmla v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmadd132ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x98 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v17.16b", + "fmla v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "fmla v3.4s, v2.4s, v4.4s", + "str q3, [x28, #16]" + ] + }, + "vfmadd132pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x98 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v17.16b", + "fmla v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmadd132pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x98 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v17.16b", + "fmla v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "fmla v3.2d, v2.2d, v4.2d", + "str q3, [x28, #16]" + ] + }, + "vfmadd132ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x99 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v17.16b", + "fmla v0.4s, v16.4s, v18.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfmadd132sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x99 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v17.16b", + "fmla v0.2d, v16.2d, v18.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfmsub132ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x9a 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.4s, v17.4s", + "fmla v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsub132ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 2 0b01 0x9a 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fneg v0.4s, v17.4s", + "fmla v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "fneg v0.4s, v3.4s", + "fmla v0.4s, v2.4s, v4.4s", + "mov v3.16b, v0.16b", + "str q3, [x28, #16]" + ] + }, + "vfmsub132pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x9a 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.2d, v17.2d", + "fmla v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsub132pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 2 0b01 0x9a 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fneg v0.2d, v17.2d", + "fmla v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "fneg v0.2d, v3.2d", + "fmla v0.2d, v2.2d, v4.2d", + "mov v3.16b, v0.16b", + "str q3, [x28, #16]" + ] + }, + "vfmsub132ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x9b 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.4s, v17.4s", + "fmla v0.4s, v16.4s, v18.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfmsub132sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x9b 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.2d, v17.2d", + "fmla v0.2d, v16.2d, v18.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmadd132ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x9c 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v17.16b", + "fmls v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmadd132ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x9c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v17.16b", + "fmls v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "fmls v3.4s, v2.4s, v4.4s", + "str q3, [x28, #16]" + ] + }, + "vfnmadd132pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x9c 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v17.16b", + "fmls v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmadd132pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x9c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v17.16b", + "fmls v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "fmls v3.2d, v2.2d, v4.2d", + "str q3, [x28, #16]" + ] + }, + "vfnmadd132ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x9d 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v17.16b", + "fmls v0.4s, v16.4s, v18.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmadd132sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x9d 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v17.16b", + "fmls v0.2d, v16.2d, v18.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmsub132ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x9e 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.4s, v17.4s", + "fmls v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmsub132ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 2 0b01 0x9e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fneg v0.4s, v17.4s", + "fmls v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "fneg v0.4s, v3.4s", + "fmls v0.4s, v2.4s, v4.4s", + "mov v3.16b, v0.16b", + "str q3, [x28, #16]" + ] + }, + "vfnmsub132pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x9e 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.2d, v17.2d", + "fmls v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmsub132pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 2 0b01 0x9e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fneg v0.2d, v17.2d", + "fmls v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "fneg v0.2d, v3.2d", + "fmls v0.2d, v2.2d, v4.2d", + "mov v3.16b, v0.16b", + "str q3, [x28, #16]" + ] + }, + "vfnmsub132ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x9f 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.4s, v17.4s", + "fmls v0.4s, v16.4s, v18.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmsub132sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x9f 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.2d, v17.2d", + "fmls v0.2d, v16.2d, v18.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfmadd213ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xa8 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v18.16b", + "fmla v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmadd213ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0xa8 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v18.16b", + "fmla v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "fmla v4.4s, v3.4s, v2.4s", + "str q4, [x28, #16]" + ] + }, + "vfmadd213pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xa8 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v18.16b", + "fmla v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmadd213pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0xa8 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v18.16b", + "fmla v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "fmla v4.2d, v3.2d, v2.2d", + "str q4, [x28, #16]" + ] + }, + "vfmadd213ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xa9 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v18.16b", + "fmla v0.4s, v17.4s, v16.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfmadd213sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xa9 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v18.16b", + "fmla v0.2d, v17.2d, v16.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfmsub213ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xaa 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.4s, v18.4s", + "fmla v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsub213ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 2 0b01 0xaa 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fneg v0.4s, v18.4s", + "fmla v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "fneg v0.4s, v4.4s", + "fmla v0.4s, v3.4s, v2.4s", + "mov v4.16b, v0.16b", + "str q4, [x28, #16]" + ] + }, + "vfmsub213pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xaa 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.2d, v18.2d", + "fmla v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsub213pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 2 0b01 0xaa 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fneg v0.2d, v18.2d", + "fmla v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "fneg v0.2d, v4.2d", + "fmla v0.2d, v3.2d, v2.2d", + "mov v4.16b, v0.16b", + "str q4, [x28, #16]" + ] + }, + "vfmsub213ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xab 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.4s, v18.4s", + "fmla v0.4s, v17.4s, v16.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfmsub213sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xab 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.2d, v18.2d", + "fmla v0.2d, v17.2d, v16.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmadd213ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xac 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v18.16b", + "fmls v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmadd213ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0xac 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v18.16b", + "fmls v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "fmls v4.4s, v3.4s, v2.4s", + "str q4, [x28, #16]" + ] + }, + "vfnmadd213pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xac 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v18.16b", + "fmls v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmadd213pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0xac 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v18.16b", + "fmls v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "fmls v4.2d, v3.2d, v2.2d", + "str q4, [x28, #16]" + ] + }, + "vfnmadd213ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xad 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v18.16b", + "fmls v0.4s, v17.4s, v16.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmadd213sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xad 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v18.16b", + "fmls v0.2d, v17.2d, v16.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmsub213ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xae 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.4s, v18.4s", + "fmls v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmsub213ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 2 0b01 0xae 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fneg v0.4s, v18.4s", + "fmls v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "fneg v0.4s, v4.4s", + "fmls v0.4s, v3.4s, v2.4s", + "mov v4.16b, v0.16b", + "str q4, [x28, #16]" + ] + }, + "vfnmsub213pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xae 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.2d, v18.2d", + "fmls v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmsub213pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 2 0b01 0xae 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fneg v0.2d, v18.2d", + "fmls v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "fneg v0.2d, v4.2d", + "fmls v0.2d, v3.2d, v2.2d", + "mov v4.16b, v0.16b", + "str q4, [x28, #16]" + ] + }, + "vfnmsub213ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xaf 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.4s, v18.4s", + "fmls v0.4s, v17.4s, v16.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmsub213sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xaf 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.2d, v18.2d", + "fmls v0.2d, v17.2d, v16.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfmadd231ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0xb8 128-bit" + ], + "ExpectedArm64ASM": [ + "fmla v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmadd231ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xb8 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fmla v16.4s, v17.4s, v18.4s", + "fmla v2.4s, v3.4s, v4.4s", + "str q2, [x28, #16]" + ] + }, + "vfmadd231pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0xb8 128-bit" + ], + "ExpectedArm64ASM": [ + "fmla v16.2d, v17.2d, v18.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmadd231pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xb8 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fmla v16.2d, v17.2d, v18.2d", + "fmla v2.2d, v3.2d, v4.2d", + "str q2, [x28, #16]" + ] + }, + "vfmadd231ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xb9 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v16.16b", + "fmla v0.4s, v17.4s, v18.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfmadd231sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xb9 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v16.16b", + "fmla v0.2d, v17.2d, v18.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfmsub231ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xba 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.4s, v16.4s", + "fmla v0.4s, v17.4s, v18.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsub231ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 2 0b01 0xba 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fneg v0.4s, v16.4s", + "fmla v0.4s, v17.4s, v18.4s", + "mov v16.16b, v0.16b", + "fneg v0.4s, v2.4s", + "fmla v0.4s, v3.4s, v4.4s", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]" + ] + }, + "vfmsub231pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xba 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.2d, v16.2d", + "fmla v0.2d, v17.2d, v18.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsub231pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 2 0b01 0xba 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fneg v0.2d, v16.2d", + "fmla v0.2d, v17.2d, v18.2d", + "mov v16.16b, v0.16b", + "fneg v0.2d, v2.2d", + "fmla v0.2d, v3.2d, v4.2d", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]" + ] + }, + "vfmsub231ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbb 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.4s, v16.4s", + "fmla v0.4s, v17.4s, v18.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfmsub231sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbb 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.2d, v16.2d", + "fmla v0.2d, v17.2d, v18.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmadd231ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0xbc 128-bit" + ], + "ExpectedArm64ASM": [ + "fmls v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmadd231ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbc 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fmls v16.4s, v17.4s, v18.4s", + "fmls v2.4s, v3.4s, v4.4s", + "str q2, [x28, #16]" + ] + }, + "vfnmadd231pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0xbc 128-bit" + ], + "ExpectedArm64ASM": [ + "fmls v16.2d, v17.2d, v18.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmadd231pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbc 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fmls v16.2d, v17.2d, v18.2d", + "fmls v2.2d, v3.2d, v4.2d", + "str q2, [x28, #16]" + ] + }, + "vfnmadd231ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbd 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v16.16b", + "fmls v0.4s, v17.4s, v18.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmadd231sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbd 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v16.16b", + "fmls v0.2d, v17.2d, v18.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmsub231ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xbe 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.4s, v16.4s", + "fmls v0.4s, v17.4s, v18.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmsub231ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 2 0b01 0xbe 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fneg v0.4s, v16.4s", + "fmls v0.4s, v17.4s, v18.4s", + "mov v16.16b, v0.16b", + "fneg v0.4s, v2.4s", + "fmls v0.4s, v3.4s, v4.4s", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]" + ] + }, + "vfnmsub231pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xbe 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.2d, v16.2d", + "fmls v0.2d, v17.2d, v18.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmsub231pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 2 0b01 0xbe 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fneg v0.2d, v16.2d", + "fmls v0.2d, v17.2d, v18.2d", + "mov v16.16b, v0.16b", + "fneg v0.2d, v2.2d", + "fmls v0.2d, v3.2d, v4.2d", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]" + ] + }, + "vfnmsub231ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbf 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.4s, v16.4s", + "fmls v0.4s, v17.4s, v18.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmsub231sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbf 128-bit" + ], + "ExpectedArm64ASM": [ + "fneg v0.2d, v16.2d", + "fmls v0.2d, v17.2d, v18.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfmaddsub213ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0xa6 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2384]", + "eor v2.16b, v18.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmaddsub213ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0xa6 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2384]", + "eor v6.16b, v18.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "eor v4.16b, v4.16b, v5.16b", + "fmla v4.4s, v3.4s, v2.4s", + "str q4, [x28, #16]" + ] + }, + "vfmaddsub213pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0xa6 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2416]", + "eor v2.16b, v18.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmaddsub213pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0xa6 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2416]", + "eor v6.16b, v18.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "eor v4.16b, v4.16b, v5.16b", + "fmla v4.2d, v3.2d, v2.2d", + "str q4, [x28, #16]" + ] + }, + "vfmsubadd213ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0xa7 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2448]", + "eor v2.16b, v18.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsubadd213ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0xa7 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2448]", + "eor v6.16b, v18.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "eor v4.16b, v4.16b, v5.16b", + "fmla v4.4s, v3.4s, v2.4s", + "str q4, [x28, #16]" + ] + }, + "vfmsubadd213pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0xa7 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2480]", + "eor v2.16b, v18.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsubadd213pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0xa7 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2480]", + "eor v6.16b, v18.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "eor v4.16b, v4.16b, v5.16b", + "fmla v4.2d, v3.2d, v2.2d", + "str q4, [x28, #16]" + ] + }, + "vfmaddsub231ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0xb6 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2384]", + "eor v2.16b, v16.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.4s, v17.4s, v18.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmaddsub231ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0xb6 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2384]", + "eor v6.16b, v16.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.4s, v17.4s, v18.4s", + "mov v16.16b, v0.16b", + "eor v2.16b, v2.16b, v5.16b", + "fmla v2.4s, v3.4s, v4.4s", + "str q2, [x28, #16]" + ] + }, + "vfmaddsub231pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0xb6 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2416]", + "eor v2.16b, v16.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.2d, v17.2d, v18.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmaddsub231pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0xb6 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2416]", + "eor v6.16b, v16.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.2d, v17.2d, v18.2d", + "mov v16.16b, v0.16b", + "eor v2.16b, v2.16b, v5.16b", + "fmla v2.2d, v3.2d, v4.2d", + "str q2, [x28, #16]" + ] + }, + "vfmsubadd231ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0xb7 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2448]", + "eor v2.16b, v16.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.4s, v17.4s, v18.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsubadd231ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0xb7 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2448]", + "eor v6.16b, v16.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.4s, v17.4s, v18.4s", + "mov v16.16b, v0.16b", + "eor v2.16b, v2.16b, v5.16b", + "fmla v2.4s, v3.4s, v4.4s", + "str q2, [x28, #16]" + ] + }, + "vfmsubadd231pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0xb7 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2480]", + "eor v2.16b, v16.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.2d, v17.2d, v18.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsubadd231pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0xb7 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2480]", + "eor v6.16b, v16.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.2d, v17.2d, v18.2d", + "mov v16.16b, v0.16b", + "eor v2.16b, v2.16b, v5.16b", + "fmla v2.2d, v3.2d, v4.2d", + "str q2, [x28, #16]" + ] + }, + "vaesimc xmm0, xmm1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0xdb 128-bit" + ], + "ExpectedArm64ASM": [ + "unimplemented (Unimplemented)", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vaesenc xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xdc 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v0.16b, v17.16b", + "unimplemented (Unimplemented)", + "unimplemented (Unimplemented)", + "eor v16.16b, v0.16b, v18.16b", + "str q2, [x28, #16]" + ] + }, + "vaesenc ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 12, + "Comment": [ + "Map 2 0b01 0xdc 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v17.16b", + "unimplemented (Unimplemented)", + "unimplemented (Unimplemented)", + "eor v16.16b, v0.16b, v18.16b", + "mov v0.16b, v3.16b", + "unimplemented (Unimplemented)", + "unimplemented (Unimplemented)", + "eor v2.16b, v0.16b, v4.16b", + "str q2, [x28, #16]" + ] + }, + "vaesenclast xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xdd 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v0.16b, v17.16b", + "unimplemented (Unimplemented)", + "eor v16.16b, v0.16b, v18.16b", + "str q2, [x28, #16]" + ] + }, + "vaesenclast ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 2 0b01 0xdd 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v17.16b", + "unimplemented (Unimplemented)", + "eor v16.16b, v0.16b, v18.16b", + "mov v0.16b, v3.16b", + "unimplemented (Unimplemented)", + "eor v2.16b, v0.16b, v4.16b", + "str q2, [x28, #16]" + ] + }, + "vaesdec xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xde 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v0.16b, v17.16b", + "unimplemented (Unimplemented)", + "unimplemented (Unimplemented)", + "eor v16.16b, v0.16b, v18.16b", + "str q2, [x28, #16]" + ] + }, + "vaesdec ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 12, + "Comment": [ + "Map 2 0b01 0xde 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v17.16b", + "unimplemented (Unimplemented)", + "unimplemented (Unimplemented)", + "eor v16.16b, v0.16b, v18.16b", + "mov v0.16b, v3.16b", + "unimplemented (Unimplemented)", + "unimplemented (Unimplemented)", + "eor v2.16b, v0.16b, v4.16b", + "str q2, [x28, #16]" + ] + }, + "vaesdeclast xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xdf 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v0.16b, v17.16b", + "unimplemented (Unimplemented)", + "eor v16.16b, v0.16b, v18.16b", + "str q2, [x28, #16]" + ] + }, + "vaesdeclast ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 2 0b01 0xdf 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v17.16b", + "unimplemented (Unimplemented)", + "eor v16.16b, v0.16b, v18.16b", + "mov v0.16b, v3.16b", + "unimplemented (Unimplemented)", + "eor v2.16b, v0.16b, v4.16b", + "str q2, [x28, #16]" + ] + } + } +} diff --git a/unittests/InstructionCountCI/AVX128/VEX_map2_SVE128.json b/unittests/InstructionCountCI/AVX128/VEX_map2_SVE128.json new file mode 100644 index 0000000000..9d7b062db5 --- /dev/null +++ b/unittests/InstructionCountCI/AVX128/VEX_map2_SVE128.json @@ -0,0 +1,3505 @@ +{ + "Features": { + "Bitness": 64, + "EnabledHostFeatures": [ + "SVE128", + "AVX" + ], + "DisabledHostFeatures": [ + "AFP", + "FLAGM", + "FLAGM2", + "SVE256" + ] + }, + "Instructions": { + "vmaskmovps xmm0, xmm1, [rax]": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x2c 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "cmplt p0.s, p6/z, z17.s, #0", + "ld1w {z16.s}, p0/z, [x4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]", + "msr nzcv, x20" + ] + }, + "vmaskmovps ymm0, ymm1, [rax]": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 2 0b01 0x2c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "cmplt p0.s, p6/z, z17.s, #0", + "ld1w {z16.s}, p0/z, [x4]", + "add x21, x4, #0x10 (16)", + "cmplt p0.s, p6/z, z2.s, #0", + "ld1w {z2.s}, p0/z, [x21]", + "str q2, [x28, #16]", + "msr nzcv, x20" + ] + }, + "vmaskmovpd xmm0, xmm1, [rax]": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x2d 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "cmplt p0.d, p6/z, z17.d, #0", + "ld1d {z16.d}, p0/z, [x4]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]", + "msr nzcv, x20" + ] + }, + "vmaskmovpd ymm0, ymm1, [rax]": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 2 0b01 0x2d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "cmplt p0.d, p6/z, z17.d, #0", + "ld1d {z16.d}, p0/z, [x4]", + "add x21, x4, #0x10 (16)", + "cmplt p0.d, p6/z, z2.d, #0", + "ld1d {z2.d}, p0/z, [x21]", + "str q2, [x28, #16]", + "msr nzcv, x20" + ] + }, + "vmaskmovps [rax], xmm0, xmm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 2 0b01 0x2e 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "cmplt p0.s, p6/z, z16.s, #0", + "st1w {z17.s}, p0, [x4]", + "msr nzcv, x20" + ] + }, + "vmaskmovps [rax], ymm0, ymm1": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 2 0b01 0x2e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "mrs x20, nzcv", + "cmplt p0.s, p6/z, z16.s, #0", + "st1w {z17.s}, p0, [x4]", + "add x21, x4, #0x10 (16)", + "cmplt p0.s, p6/z, z2.s, #0", + "st1w {z3.s}, p0, [x21]", + "msr nzcv, x20" + ] + }, + "vmaskmovpd [rax], xmm0, xmm1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 2 0b01 0x2f 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "cmplt p0.d, p6/z, z16.d, #0", + "st1d {z17.d}, p0, [x4]", + "msr nzcv, x20" + ] + }, + "vmaskmovpd [rax], ymm0, ymm1": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 2 0b01 0x2f 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "mrs x20, nzcv", + "cmplt p0.d, p6/z, z16.d, #0", + "st1d {z17.d}, p0, [x4]", + "add x21, x4, #0x10 (16)", + "cmplt p0.d, p6/z, z2.d, #0", + "st1d {z3.d}, p0, [x21]", + "msr nzcv, x20" + ] + }, + "vpgatherdd xmm0, [xmm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 27, + "Comment": [ + "Map 2 0b01 0x90 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdd xmm0, [xmm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x90 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "cmplt p0.s, p6/z, z18.s, #0", + "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw #2]", + "mov z16.s, p0/m, z0.s", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdd xmm0, [xmm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 27, + "Comment": [ + "Map 2 0b01 0x90 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdd ymm0, [ymm1*1 + rax], ymm2": { + "ExpectedInstructionCount": 14, + "Comment": [ + "Map 2 0b01 0x90 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "cmplt p0.s, p6/z, z18.s, #0", + "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw]", + "mov z16.s, p0/m, z0.s", + "cmplt p0.s, p6/z, z4.s, #0", + "ld1w {z0.s}, p0/z, [x4, z3.s, sxtw]", + "mov z2.s, p0/m, z0.s", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdd ymm0, [ymm1*2 + rax], ymm2": { + "ExpectedInstructionCount": 52, + "Comment": [ + "Map 2 0b01 0x90 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov w0, v4.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v4.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[1], [x1]", + "mov w0, v4.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[2]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[2], [x1]", + "mov w0, v4.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[3]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[3], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdd ymm0, [ymm1*4 + rax], ymm2": { + "ExpectedInstructionCount": 14, + "Comment": [ + "Map 2 0b01 0x90 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "cmplt p0.s, p6/z, z18.s, #0", + "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw #2]", + "mov z16.s, p0/m, z0.s", + "cmplt p0.s, p6/z, z4.s, #0", + "ld1w {z0.s}, p0/z, [x4, z3.s, sxtw #2]", + "mov z2.s, p0/m, z0.s", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdd ymm0, [ymm1*8 + rax], ymm2": { + "ExpectedInstructionCount": 52, + "Comment": [ + "Map 2 0b01 0x90 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov w0, v4.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v4.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[1], [x1]", + "mov w0, v4.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[2]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[2], [x1]", + "mov w0, v4.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[3]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[3], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdq xmm0, [xmm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x90 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdq xmm0, [xmm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x90 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdq xmm0, [xmm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x90 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdq xmm0, [xmm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x90 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdq ymm0, [xmm1*1 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x90 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdq ymm0, [xmm1*2 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x90 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdq ymm0, [xmm1*4 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x90 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherdq ymm0, [xmm1*8 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x90 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqd xmm0, [xmm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x91 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0", + "ld1 {v0.s}[1], [x1]", + "mov v2.16b, v0.16b", + "movi v18.2d, #0x0", + "zip1 v16.2d, v2.2d, v18.2d", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqd xmm0, [xmm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x91 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[1], [x1]", + "mov v2.16b, v0.16b", + "movi v18.2d, #0x0", + "zip1 v16.2d, v2.2d, v18.2d", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqd xmm0, [xmm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x91 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[1], [x1]", + "mov v2.16b, v0.16b", + "movi v18.2d, #0x0", + "zip1 v16.2d, v2.2d, v18.2d", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqd xmm0, [xmm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x91 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[1], [x1]", + "mov v2.16b, v0.16b", + "movi v18.2d, #0x0", + "zip1 v16.2d, v2.2d, v18.2d", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqd xmm0, [ymm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 30, + "Comment": [ + "Map 2 0b01 0x91 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[0]", + "add x1, x4, x0", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[1]", + "add x1, x4, x0", + "ld1 {v0.s}[3], [x1]", + "mov v3.16b, v0.16b", + "mov v0.16b, v3.16b", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqd xmm0, [ymm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 30, + "Comment": [ + "Map 2 0b01 0x91 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[3], [x1]", + "mov v3.16b, v0.16b", + "mov v0.16b, v3.16b", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqd xmm0, [ymm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 30, + "Comment": [ + "Map 2 0b01 0x91 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[3], [x1]", + "mov v3.16b, v0.16b", + "mov v0.16b, v3.16b", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqd xmm0, [ymm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 30, + "Comment": [ + "Map 2 0b01 0x91 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[3], [x1]", + "mov v3.16b, v0.16b", + "mov v0.16b, v3.16b", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqq xmm0, [xmm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x91 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "cmplt p0.d, p6/z, z18.d, #0", + "ld1d {z0.d}, p0/z, [x4, z17.d]", + "mov z16.d, p0/m, z0.d", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqq xmm0, [xmm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x91 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqq xmm0, [xmm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x91 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqq xmm0, [xmm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x91 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "cmplt p0.d, p6/z, z18.d, #0", + "ld1d {z0.d}, p0/z, [x4, z17.d, lsl #3]", + "mov z16.d, p0/m, z0.d", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqq ymm0, [ymm1*1 + rax], ymm2": { + "ExpectedInstructionCount": 14, + "Comment": [ + "Map 2 0b01 0x91 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "cmplt p0.d, p6/z, z18.d, #0", + "ld1d {z0.d}, p0/z, [x4, z17.d]", + "mov z16.d, p0/m, z0.d", + "cmplt p0.d, p6/z, z4.d, #0", + "ld1d {z0.d}, p0/z, [x4, z3.d]", + "mov z2.d, p0/m, z0.d", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqq ymm0, [ymm1*2 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x91 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqq ymm0, [ymm1*4 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x91 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vpgatherqq ymm0, [ymm1*8 + rax], ymm2": { + "ExpectedInstructionCount": 14, + "Comment": [ + "Map 2 0b01 0x91 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "cmplt p0.d, p6/z, z18.d, #0", + "ld1d {z0.d}, p0/z, [x4, z17.d, lsl #3]", + "mov z16.d, p0/m, z0.d", + "cmplt p0.d, p6/z, z4.d, #0", + "ld1d {z0.d}, p0/z, [x4, z3.d, lsl #3]", + "mov z2.d, p0/m, z0.d", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdps xmm0, [xmm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x92 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "cmplt p0.s, p6/z, z18.s, #0", + "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw]", + "mov z16.s, p0/m, z0.s", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdps xmm0, [xmm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 27, + "Comment": [ + "Map 2 0b01 0x92 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdps xmm0, [xmm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x92 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "cmplt p0.s, p6/z, z18.s, #0", + "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw #2]", + "mov z16.s, p0/m, z0.s", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdps xmm0, [xmm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 27, + "Comment": [ + "Map 2 0b01 0x92 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdps ymm0, [ymm1*1 + rax], ymm2": { + "ExpectedInstructionCount": 14, + "Comment": [ + "Map 2 0b01 0x92 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "cmplt p0.s, p6/z, z18.s, #0", + "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw]", + "mov z16.s, p0/m, z0.s", + "cmplt p0.s, p6/z, z4.s, #0", + "ld1w {z0.s}, p0/z, [x4, z3.s, sxtw]", + "mov z2.s, p0/m, z0.s", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdps ymm0, [ymm1*2 + rax], ymm2": { + "ExpectedInstructionCount": 52, + "Comment": [ + "Map 2 0b01 0x92 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov w0, v4.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v4.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[1], [x1]", + "mov w0, v4.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[2]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[2], [x1]", + "mov w0, v4.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[3]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.s}[3], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdps ymm0, [ymm1*4 + rax], ymm2": { + "ExpectedInstructionCount": 14, + "Comment": [ + "Map 2 0b01 0x92 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "cmplt p0.s, p6/z, z18.s, #0", + "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw #2]", + "mov z16.s, p0/m, z0.s", + "cmplt p0.s, p6/z, z4.s, #0", + "ld1w {z0.s}, p0/z, [x4, z3.s, sxtw #2]", + "mov z2.s, p0/m, z0.s", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdps ymm0, [ymm1*8 + rax], ymm2": { + "ExpectedInstructionCount": 52, + "Comment": [ + "Map 2 0b01 0x92 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[3], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov w0, v4.s[0]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v4.s[1]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[1], [x1]", + "mov w0, v4.s[2]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[2]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[2], [x1]", + "mov w0, v4.s[3]", + "tbz w0, #31, #+0x10", + "smov x0, v3.s[3]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.s}[3], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdpd xmm0, [xmm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x92 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdpd xmm0, [xmm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x92 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdpd xmm0, [xmm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x92 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdpd xmm0, [xmm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x92 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdpd ymm0, [xmm1*1 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x92 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdpd ymm0, [xmm1*2 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x92 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #1", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdpd ymm0, [xmm1*4 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x92 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #2", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherdpd ymm0, [xmm1*8 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x92 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[0]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[1]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[2]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "smov x0, v17.s[3]", + "add x1, x4, w0, sxtw #3", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqps xmm0, [xmm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x93 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0", + "ld1 {v0.s}[1], [x1]", + "mov v2.16b, v0.16b", + "movi v18.2d, #0x0", + "zip1 v16.2d, v2.2d, v18.2d", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqps xmm0, [xmm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x93 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[1], [x1]", + "mov v2.16b, v0.16b", + "movi v18.2d, #0x0", + "zip1 v16.2d, v2.2d, v18.2d", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqps xmm0, [xmm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x93 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[1], [x1]", + "mov v2.16b, v0.16b", + "movi v18.2d, #0x0", + "zip1 v16.2d, v2.2d, v18.2d", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqps xmm0, [xmm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 18, + "Comment": [ + "Map 2 0b01 0x93 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[1], [x1]", + "mov v2.16b, v0.16b", + "movi v18.2d, #0x0", + "zip1 v16.2d, v2.2d, v18.2d", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqps xmm0, [ymm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 30, + "Comment": [ + "Map 2 0b01 0x93 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[0]", + "add x1, x4, x0", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[1]", + "add x1, x4, x0", + "ld1 {v0.s}[3], [x1]", + "mov v3.16b, v0.16b", + "mov v0.16b, v3.16b", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqps xmm0, [ymm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 30, + "Comment": [ + "Map 2 0b01 0x93 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.s}[3], [x1]", + "mov v3.16b, v0.16b", + "mov v0.16b, v3.16b", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqps xmm0, [ymm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 30, + "Comment": [ + "Map 2 0b01 0x93 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.s}[3], [x1]", + "mov v3.16b, v0.16b", + "mov v0.16b, v3.16b", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqps xmm0, [ymm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 30, + "Comment": [ + "Map 2 0b01 0x93 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov w0, v18.s[0]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[0], [x1]", + "mov w0, v18.s[1]", + "tbz w0, #31, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[1], [x1]", + "mov w0, v18.s[2]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[0]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[2], [x1]", + "mov w0, v18.s[3]", + "tbz w0, #31, #+0x10", + "mov x0, v2.d[1]", + "add x1, x4, x0, lsl #3", + "ld1 {v0.s}[3], [x1]", + "mov v3.16b, v0.16b", + "mov v0.16b, v3.16b", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqpd xmm0, [xmm1*1 + rax], xmm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x93 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "cmplt p0.d, p6/z, z18.d, #0", + "ld1d {z0.d}, p0/z, [x4, z17.d]", + "mov z16.d, p0/m, z0.d", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqpd xmm0, [xmm1*2 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x93 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqpd xmm0, [xmm1*4 + rax], xmm2": { + "ExpectedInstructionCount": 17, + "Comment": [ + "Map 2 0b01 0x93 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqpd xmm0, [xmm1*8 + rax], xmm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x93 128-bit" + ], + "ExpectedArm64ASM": [ + "mrs x20, nzcv", + "cmplt p0.d, p6/z, z18.d, #0", + "ld1d {z0.d}, p0/z, [x4, z17.d, lsl #3]", + "mov z16.d, p0/m, z0.d", + "movi v18.2d, #0x0", + "str q18, [x28, #16]", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqpd ymm0, [ymm1*1 + rax], ymm2": { + "ExpectedInstructionCount": 14, + "Comment": [ + "Map 2 0b01 0x93 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "cmplt p0.d, p6/z, z18.d, #0", + "ld1d {z0.d}, p0/z, [x4, z17.d]", + "mov z16.d, p0/m, z0.d", + "cmplt p0.d, p6/z, z4.d, #0", + "ld1d {z0.d}, p0/z, [x4, z3.d]", + "mov z2.d, p0/m, z0.d", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqpd ymm0, [ymm1*2 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x93 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[0]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[1]", + "add x1, x4, x0, lsl #1", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqpd ymm0, [ymm1*4 + rax], ymm2": { + "ExpectedInstructionCount": 32, + "Comment": [ + "Map 2 0b01 0x93 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "mov v0.16b, v16.16b", + "mov x0, v18.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v18.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v17.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[1], [x1]", + "mov v16.16b, v0.16b", + "mov v0.16b, v2.16b", + "mov x0, v4.d[0]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[0]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[0], [x1]", + "mov x0, v4.d[1]", + "tbz x0, #63, #+0x10", + "mov x0, v3.d[1]", + "add x1, x4, x0, lsl #2", + "ld1 {v0.d}[1], [x1]", + "mov v2.16b, v0.16b", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vgatherqpd ymm0, [ymm1*8 + rax], ymm2": { + "ExpectedInstructionCount": 14, + "Comment": [ + "Map 2 0b01 0x93 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mrs x20, nzcv", + "cmplt p0.d, p6/z, z18.d, #0", + "ld1d {z0.d}, p0/z, [x4, z17.d, lsl #3]", + "mov z16.d, p0/m, z0.d", + "cmplt p0.d, p6/z, z4.d, #0", + "ld1d {z0.d}, p0/z, [x4, z3.d, lsl #3]", + "mov z2.d, p0/m, z0.d", + "str q2, [x28, #16]", + "movi v18.2d, #0x0", + "str q18, [x28, #48]", + "msr nzcv, x20" + ] + }, + "vfmaddsub132ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0x96 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2384]", + "eor v2.16b, v17.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmaddsub132ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0x96 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2384]", + "eor v6.16b, v17.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "eor v3.16b, v3.16b, v5.16b", + "fmla v3.4s, v2.4s, v4.4s", + "str q3, [x28, #16]" + ] + }, + "vfmaddsub132pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0x96 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2416]", + "eor v2.16b, v17.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmaddsub132pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0x96 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2416]", + "eor v6.16b, v17.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "eor v3.16b, v3.16b, v5.16b", + "fmla v3.2d, v2.2d, v4.2d", + "str q3, [x28, #16]" + ] + }, + "vfmsubadd132ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0x97 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2448]", + "eor v2.16b, v17.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsubadd132ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0x97 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2448]", + "eor v6.16b, v17.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "eor v3.16b, v3.16b, v5.16b", + "fmla v3.4s, v2.4s, v4.4s", + "str q3, [x28, #16]" + ] + }, + "vfmsubadd132pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0x97 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2480]", + "eor v2.16b, v17.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsubadd132pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0x97 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2480]", + "eor v6.16b, v17.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "eor v3.16b, v3.16b, v5.16b", + "fmla v3.2d, v2.2d, v4.2d", + "str q3, [x28, #16]" + ] + }, + "vfmadd132ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x98 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v17.16b", + "fmla v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmadd132ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x98 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v17.16b", + "fmla v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "fmla v3.4s, v2.4s, v4.4s", + "str q3, [x28, #16]" + ] + }, + "vfmadd132pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x98 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v17.16b", + "fmla v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmadd132pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x98 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v17.16b", + "fmla v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "fmla v3.2d, v2.2d, v4.2d", + "str q3, [x28, #16]" + ] + }, + "vfmadd132ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x99 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v17.16b", + "fmla v0.4s, v16.4s, v18.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfmadd132sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x99 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v17.16b", + "fmla v0.2d, v16.2d, v18.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfmsub132ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x9a 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z17.d", + "fnmls z0.s, p6/m, z16.s, z18.s", + "mov z16.d, z0.d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsub132ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x9a 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov z0.d, z17.d", + "fnmls z0.s, p6/m, z16.s, z18.s", + "mov z16.d, z0.d", + "fnmls z3.s, p6/m, z2.s, z4.s", + "str q3, [x28, #16]" + ] + }, + "vfmsub132pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x9a 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z17.d", + "fnmls z0.d, p6/m, z16.d, z18.d", + "mov z16.d, z0.d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsub132pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x9a 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov z0.d, z17.d", + "fnmls z0.d, p6/m, z16.d, z18.d", + "mov z16.d, z0.d", + "fnmls z3.d, p6/m, z2.d, z4.d", + "str q3, [x28, #16]" + ] + }, + "vfmsub132ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x9b 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z17.d", + "fnmls z0.s, p6/m, z16.s, z18.s", + "mov z2.d, z0.d", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfmsub132sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x9b 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z17.d", + "fnmls z0.d, p6/m, z16.d, z18.d", + "mov z2.d, z0.d", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmadd132ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x9c 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v17.16b", + "fmls v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmadd132ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x9c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v17.16b", + "fmls v0.4s, v16.4s, v18.4s", + "mov v16.16b, v0.16b", + "fmls v3.4s, v2.4s, v4.4s", + "str q3, [x28, #16]" + ] + }, + "vfnmadd132pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x9c 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v17.16b", + "fmls v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmadd132pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x9c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v17.16b", + "fmls v0.2d, v16.2d, v18.2d", + "mov v16.16b, v0.16b", + "fmls v3.2d, v2.2d, v4.2d", + "str q3, [x28, #16]" + ] + }, + "vfnmadd132ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x9d 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v17.16b", + "fmls v0.4s, v16.4s, v18.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmadd132sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x9d 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v17.16b", + "fmls v0.2d, v16.2d, v18.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmsub132ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x9e 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z17.d", + "fnmla z0.s, p6/m, z16.s, z18.s", + "mov z16.d, z0.d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmsub132ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x9e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov z0.d, z17.d", + "fnmla z0.s, p6/m, z16.s, z18.s", + "mov z16.d, z0.d", + "fnmla z3.s, p6/m, z2.s, z4.s", + "str q3, [x28, #16]" + ] + }, + "vfnmsub132pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0x9e 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z17.d", + "fnmla z0.d, p6/m, z16.d, z18.d", + "mov z16.d, z0.d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmsub132pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0x9e 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov z0.d, z17.d", + "fnmla z0.d, p6/m, z16.d, z18.d", + "mov z16.d, z0.d", + "fnmla z3.d, p6/m, z2.d, z4.d", + "str q3, [x28, #16]" + ] + }, + "vfnmsub132ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x9f 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z17.d", + "fnmla z0.s, p6/m, z16.s, z18.s", + "mov z2.d, z0.d", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmsub132sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0x9f 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z17.d", + "fnmla z0.d, p6/m, z16.d, z18.d", + "mov z2.d, z0.d", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfmadd213ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xa8 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v18.16b", + "fmla v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmadd213ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0xa8 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v18.16b", + "fmla v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "fmla v4.4s, v3.4s, v2.4s", + "str q4, [x28, #16]" + ] + }, + "vfmadd213pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xa8 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v18.16b", + "fmla v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmadd213pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0xa8 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v18.16b", + "fmla v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "fmla v4.2d, v3.2d, v2.2d", + "str q4, [x28, #16]" + ] + }, + "vfmadd213ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xa9 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v18.16b", + "fmla v0.4s, v17.4s, v16.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfmadd213sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xa9 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v18.16b", + "fmla v0.2d, v17.2d, v16.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfmsub213ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xaa 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z18.d", + "fnmls z0.s, p6/m, z17.s, z16.s", + "mov z16.d, z0.d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsub213ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0xaa 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov z0.d, z18.d", + "fnmls z0.s, p6/m, z17.s, z16.s", + "mov z16.d, z0.d", + "fnmls z4.s, p6/m, z3.s, z2.s", + "str q4, [x28, #16]" + ] + }, + "vfmsub213pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xaa 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z18.d", + "fnmls z0.d, p6/m, z17.d, z16.d", + "mov z16.d, z0.d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsub213pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0xaa 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov z0.d, z18.d", + "fnmls z0.d, p6/m, z17.d, z16.d", + "mov z16.d, z0.d", + "fnmls z4.d, p6/m, z3.d, z2.d", + "str q4, [x28, #16]" + ] + }, + "vfmsub213ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xab 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z18.d", + "fnmls z0.s, p6/m, z17.s, z16.s", + "mov z2.d, z0.d", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfmsub213sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xab 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z18.d", + "fnmls z0.d, p6/m, z17.d, z16.d", + "mov z2.d, z0.d", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmadd213ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xac 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v18.16b", + "fmls v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmadd213ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0xac 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v18.16b", + "fmls v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "fmls v4.4s, v3.4s, v2.4s", + "str q4, [x28, #16]" + ] + }, + "vfnmadd213pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xac 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v18.16b", + "fmls v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmadd213pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0xac 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov v0.16b, v18.16b", + "fmls v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "fmls v4.2d, v3.2d, v2.2d", + "str q4, [x28, #16]" + ] + }, + "vfnmadd213ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xad 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v18.16b", + "fmls v0.4s, v17.4s, v16.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmadd213sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xad 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v18.16b", + "fmls v0.2d, v17.2d, v16.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmsub213ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xae 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z18.d", + "fnmla z0.s, p6/m, z17.s, z16.s", + "mov z16.d, z0.d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmsub213ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0xae 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov z0.d, z18.d", + "fnmla z0.s, p6/m, z17.s, z16.s", + "mov z16.d, z0.d", + "fnmla z4.s, p6/m, z3.s, z2.s", + "str q4, [x28, #16]" + ] + }, + "vfnmsub213pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 2 0b01 0xae 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z18.d", + "fnmla z0.d, p6/m, z17.d, z16.d", + "mov z16.d, z0.d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmsub213pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 2 0b01 0xae 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "mov z0.d, z18.d", + "fnmla z0.d, p6/m, z17.d, z16.d", + "mov z16.d, z0.d", + "fnmla z4.d, p6/m, z3.d, z2.d", + "str q4, [x28, #16]" + ] + }, + "vfnmsub213ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xaf 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z18.d", + "fnmla z0.s, p6/m, z17.s, z16.s", + "mov z2.d, z0.d", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmsub213sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xaf 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z18.d", + "fnmla z0.d, p6/m, z17.d, z16.d", + "mov z2.d, z0.d", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfmadd231ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0xb8 128-bit" + ], + "ExpectedArm64ASM": [ + "fmla v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmadd231ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xb8 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fmla v16.4s, v17.4s, v18.4s", + "fmla v2.4s, v3.4s, v4.4s", + "str q2, [x28, #16]" + ] + }, + "vfmadd231pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0xb8 128-bit" + ], + "ExpectedArm64ASM": [ + "fmla v16.2d, v17.2d, v18.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmadd231pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xb8 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fmla v16.2d, v17.2d, v18.2d", + "fmla v2.2d, v3.2d, v4.2d", + "str q2, [x28, #16]" + ] + }, + "vfmadd231ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xb9 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v16.16b", + "fmla v0.4s, v17.4s, v18.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfmadd231sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xb9 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v16.16b", + "fmla v0.2d, v17.2d, v18.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfmsub231ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0xba 128-bit" + ], + "ExpectedArm64ASM": [ + "fnmls z16.s, p6/m, z17.s, z18.s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsub231ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xba 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fnmls z16.s, p6/m, z17.s, z18.s", + "fnmls z2.s, p6/m, z3.s, z4.s", + "str q2, [x28, #16]" + ] + }, + "vfmsub231pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0xba 128-bit" + ], + "ExpectedArm64ASM": [ + "fnmls z16.d, p6/m, z17.d, z18.d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsub231pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xba 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fnmls z16.d, p6/m, z17.d, z18.d", + "fnmls z2.d, p6/m, z3.d, z4.d", + "str q2, [x28, #16]" + ] + }, + "vfmsub231ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbb 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z16.d", + "fnmls z0.s, p6/m, z17.s, z18.s", + "mov z2.d, z0.d", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfmsub231sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbb 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z16.d", + "fnmls z0.d, p6/m, z17.d, z18.d", + "mov z2.d, z0.d", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmadd231ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0xbc 128-bit" + ], + "ExpectedArm64ASM": [ + "fmls v16.4s, v17.4s, v18.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmadd231ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbc 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fmls v16.4s, v17.4s, v18.4s", + "fmls v2.4s, v3.4s, v4.4s", + "str q2, [x28, #16]" + ] + }, + "vfnmadd231pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0xbc 128-bit" + ], + "ExpectedArm64ASM": [ + "fmls v16.2d, v17.2d, v18.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmadd231pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbc 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fmls v16.2d, v17.2d, v18.2d", + "fmls v2.2d, v3.2d, v4.2d", + "str q2, [x28, #16]" + ] + }, + "vfnmadd231ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbd 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v16.16b", + "fmls v0.4s, v17.4s, v18.4s", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmadd231sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbd 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v0.16b, v16.16b", + "fmls v0.2d, v17.2d, v18.2d", + "mov v2.16b, v0.16b", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmsub231ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0xbe 128-bit" + ], + "ExpectedArm64ASM": [ + "fnmla z16.s, p6/m, z17.s, z18.s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmsub231ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbe 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fnmla z16.s, p6/m, z17.s, z18.s", + "fnmla z2.s, p6/m, z3.s, z4.s", + "str q2, [x28, #16]" + ] + }, + "vfnmsub231pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 2 0b01 0xbe 128-bit" + ], + "ExpectedArm64ASM": [ + "fnmla z16.d, p6/m, z17.d, z18.d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfnmsub231pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbe 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "fnmla z16.d, p6/m, z17.d, z18.d", + "fnmla z2.d, p6/m, z3.d, z4.d", + "str q2, [x28, #16]" + ] + }, + "vfnmsub231ss xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbf 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z16.d", + "fnmla z0.s, p6/m, z17.s, z18.s", + "mov z2.d, z0.d", + "movi v3.2d, #0x0", + "mov v16.s[0], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vfnmsub231sd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 2 0b01 0xbf 128-bit" + ], + "ExpectedArm64ASM": [ + "mov z0.d, z16.d", + "fnmla z0.d, p6/m, z17.d, z18.d", + "mov z2.d, z0.d", + "movi v3.2d, #0x0", + "mov v16.d[0], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vfmaddsub213ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0xa6 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2384]", + "eor v2.16b, v18.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmaddsub213ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0xa6 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2384]", + "eor v6.16b, v18.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "eor v4.16b, v4.16b, v5.16b", + "fmla v4.4s, v3.4s, v2.4s", + "str q4, [x28, #16]" + ] + }, + "vfmaddsub213pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0xa6 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2416]", + "eor v2.16b, v18.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmaddsub213pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0xa6 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2416]", + "eor v6.16b, v18.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "eor v4.16b, v4.16b, v5.16b", + "fmla v4.2d, v3.2d, v2.2d", + "str q4, [x28, #16]" + ] + }, + "vfmsubadd213ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0xa7 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2448]", + "eor v2.16b, v18.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsubadd213ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0xa7 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2448]", + "eor v6.16b, v18.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.4s, v17.4s, v16.4s", + "mov v16.16b, v0.16b", + "eor v4.16b, v4.16b, v5.16b", + "fmla v4.4s, v3.4s, v2.4s", + "str q4, [x28, #16]" + ] + }, + "vfmsubadd213pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0xa7 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2480]", + "eor v2.16b, v18.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsubadd213pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0xa7 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2480]", + "eor v6.16b, v18.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.2d, v17.2d, v16.2d", + "mov v16.16b, v0.16b", + "eor v4.16b, v4.16b, v5.16b", + "fmla v4.2d, v3.2d, v2.2d", + "str q4, [x28, #16]" + ] + }, + "vfmaddsub231ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0xb6 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2384]", + "eor v2.16b, v16.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.4s, v17.4s, v18.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmaddsub231ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0xb6 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2384]", + "eor v6.16b, v16.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.4s, v17.4s, v18.4s", + "mov v16.16b, v0.16b", + "eor v2.16b, v2.16b, v5.16b", + "fmla v2.4s, v3.4s, v4.4s", + "str q2, [x28, #16]" + ] + }, + "vfmaddsub231pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0xb6 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2416]", + "eor v2.16b, v16.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.2d, v17.2d, v18.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmaddsub231pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0xb6 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2416]", + "eor v6.16b, v16.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.2d, v17.2d, v18.2d", + "mov v16.16b, v0.16b", + "eor v2.16b, v2.16b, v5.16b", + "fmla v2.2d, v3.2d, v4.2d", + "str q2, [x28, #16]" + ] + }, + "vfmsubadd231ps xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0xb7 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2448]", + "eor v2.16b, v16.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.4s, v17.4s, v18.4s", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsubadd231ps ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0xb7 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2448]", + "eor v6.16b, v16.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.4s, v17.4s, v18.4s", + "mov v16.16b, v0.16b", + "eor v2.16b, v2.16b, v5.16b", + "fmla v2.4s, v3.4s, v4.4s", + "str q2, [x28, #16]" + ] + }, + "vfmsubadd231pd xmm0, xmm1, xmm2": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 2 0b01 0xb7 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #2480]", + "eor v2.16b, v16.16b, v2.16b", + "mov v0.16b, v2.16b", + "fmla v0.2d, v17.2d, v18.2d", + "mov v16.16b, v0.16b", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vfmsubadd231pd ymm0, ymm1, ymm2": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 2 0b01 0xb7 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "ldr q4, [x28, #48]", + "ldr q5, [x28, #2480]", + "eor v6.16b, v16.16b, v5.16b", + "mov v0.16b, v6.16b", + "fmla v0.2d, v17.2d, v18.2d", + "mov v16.16b, v0.16b", + "eor v2.16b, v2.16b, v5.16b", + "fmla v2.2d, v3.2d, v4.2d", + "str q2, [x28, #16]" + ] + } + } +} diff --git a/unittests/InstructionCountCI/AVX128/VEX_map3.json b/unittests/InstructionCountCI/AVX128/VEX_map3.json new file mode 100644 index 0000000000..d126f6e282 --- /dev/null +++ b/unittests/InstructionCountCI/AVX128/VEX_map3.json @@ -0,0 +1,4009 @@ +{ + "Features": { + "Bitness": 64, + "EnabledHostFeatures": [ + "AVX" + ], + "DisabledHostFeatures": [ + "AFP", + "SVE256", + "SVE128" + ] + }, + "Instructions": { + "vpermq ymm0, ymm1, 00000000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x00 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v16.2d, v17.d[0]", + "str q16, [x28, #16]" + ] + }, + "vpermq ymm0, ymm1, 01010101b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x00 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v16.2d, v17.d[1]", + "str q16, [x28, #16]" + ] + }, + "vpermq ymm0, ymm1, 10101010b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x00 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "dup v16.2d, v2.d[0]", + "str q16, [x28, #16]" + ] + }, + "vpermq ymm0, ymm1, 11111111b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x00 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "dup v16.2d, v2.d[1]", + "str q16, [x28, #16]" + ] + }, + "vpermpd ymm0, ymm1, 00000000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x01 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v16.2d, v17.d[0]", + "str q16, [x28, #16]" + ] + }, + "vpermpd ymm0, ymm1, 01010101b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x01 256-bit" + ], + "ExpectedArm64ASM": [ + "dup v16.2d, v17.d[1]", + "str q16, [x28, #16]" + ] + }, + "vpermpd ymm0, ymm1, 10101010b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x01 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "dup v16.2d, v2.d[0]", + "str q16, [x28, #16]" + ] + }, + "vpermpd ymm0, ymm1, 11111111b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x01 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "dup v16.2d, v2.d[1]", + "str q16, [x28, #16]" + ] + }, + "vpblendd xmm0, xmm1, 0000b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x02 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v16.s[0]", + "mov v3.s[1], v16.s[1]", + "mov v3.s[2], v16.s[2]", + "mov v0.16b, v3.16b", + "mov v0.s[3], v16.s[3]", + "mov v16.16b, v0.16b", + "str q2, [x28, #16]" + ] + }, + "vpblendd xmm0, xmm1, 0001b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x02 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v17.s[0]", + "mov v3.s[1], v16.s[1]", + "mov v3.s[2], v16.s[2]", + "mov v0.16b, v3.16b", + "mov v0.s[3], v16.s[3]", + "mov v16.16b, v0.16b", + "str q2, [x28, #16]" + ] + }, + "vpblendd xmm0, xmm1, 0010b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x02 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v16.s[0]", + "mov v3.s[1], v17.s[1]", + "mov v3.s[2], v16.s[2]", + "mov v0.16b, v3.16b", + "mov v0.s[3], v16.s[3]", + "mov v16.16b, v0.16b", + "str q2, [x28, #16]" + ] + }, + "vpblendd xmm0, xmm1, 0011b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x02 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v17.s[0]", + "mov v3.s[1], v17.s[1]", + "mov v3.s[2], v16.s[2]", + "mov v0.16b, v3.16b", + "mov v0.s[3], v16.s[3]", + "mov v16.16b, v0.16b", + "str q2, [x28, #16]" + ] + }, + "vpblendd xmm0, xmm1, 0100b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x02 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v16.s[0]", + "mov v3.s[1], v16.s[1]", + "mov v3.s[2], v17.s[2]", + "mov v0.16b, v3.16b", + "mov v0.s[3], v16.s[3]", + "mov v16.16b, v0.16b", + "str q2, [x28, #16]" + ] + }, + "vpblendd xmm0, xmm1, 0101b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x02 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v17.s[0]", + "mov v3.s[1], v16.s[1]", + "mov v3.s[2], v17.s[2]", + "mov v0.16b, v3.16b", + "mov v0.s[3], v16.s[3]", + "mov v16.16b, v0.16b", + "str q2, [x28, #16]" + ] + }, + "vpblendd xmm0, xmm1, 0110b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x02 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v16.s[0]", + "mov v3.s[1], v17.s[1]", + "mov v3.s[2], v17.s[2]", + "mov v0.16b, v3.16b", + "mov v0.s[3], v16.s[3]", + "mov v16.16b, v0.16b", + "str q2, [x28, #16]" + ] + }, + "vpblendd xmm0, xmm1, 0111b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x02 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v17.s[0]", + "mov v3.s[1], v17.s[1]", + "mov v3.s[2], v17.s[2]", + "mov v0.16b, v3.16b", + "mov v0.s[3], v16.s[3]", + "mov v16.16b, v0.16b", + "str q2, [x28, #16]" + ] + }, + "vpblendd xmm0, xmm1, 1000b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 3 0b01 0x02 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v16.s[0]", + "mov v3.s[1], v16.s[1]", + "mov v3.s[2], v16.s[2]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[3]", + "str q2, [x28, #16]" + ] + }, + "vpblendd xmm0, xmm1, 1001b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 3 0b01 0x02 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v17.s[0]", + "mov v3.s[1], v16.s[1]", + "mov v3.s[2], v16.s[2]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[3]", + "str q2, [x28, #16]" + ] + }, + "vpblendd xmm0, xmm1, 1010b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 3 0b01 0x02 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v16.s[0]", + "mov v3.s[1], v17.s[1]", + "mov v3.s[2], v16.s[2]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[3]", + "str q2, [x28, #16]" + ] + }, + "vpblendd xmm0, xmm1, 1011b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 3 0b01 0x02 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v17.s[0]", + "mov v3.s[1], v17.s[1]", + "mov v3.s[2], v16.s[2]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[3]", + "str q2, [x28, #16]" + ] + }, + "vpblendd xmm0, xmm1, 1100b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 3 0b01 0x02 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v16.s[0]", + "mov v3.s[1], v16.s[1]", + "mov v3.s[2], v17.s[2]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[3]", + "str q2, [x28, #16]" + ] + }, + "vpblendd xmm0, xmm1, 1101b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 3 0b01 0x02 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v17.s[0]", + "mov v3.s[1], v16.s[1]", + "mov v3.s[2], v17.s[2]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[3]", + "str q2, [x28, #16]" + ] + }, + "vpblendd xmm0, xmm1, 1110b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 3 0b01 0x02 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v16.s[0]", + "mov v3.s[1], v17.s[1]", + "mov v3.s[2], v17.s[2]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[3]", + "str q2, [x28, #16]" + ] + }, + "vpblendd xmm0, xmm1, 1111b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 3 0b01 0x02 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v17.s[0]", + "mov v3.s[1], v17.s[1]", + "mov v3.s[2], v17.s[2]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[3]", + "str q2, [x28, #16]" + ] + }, + "vpblendd ymm0, ymm1, 00000000b": { + "ExpectedInstructionCount": 14, + "Comment": [ + "Map 3 0b01 0x02 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.s[0], v16.s[0]", + "mov v4.s[1], v16.s[1]", + "mov v4.s[2], v16.s[2]", + "mov v0.16b, v4.16b", + "mov v0.s[3], v16.s[3]", + "mov v16.16b, v0.16b", + "mov v3.s[0], v2.s[0]", + "mov v3.s[1], v2.s[1]", + "mov v3.s[2], v2.s[2]", + "mov v3.s[3], v2.s[3]", + "str q3, [x28, #16]" + ] + }, + "vpblendd ymm0, ymm1, 01010101b": { + "ExpectedInstructionCount": 15, + "Comment": [ + "Map 3 0b01 0x02 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v17.s[0]", + "mov v5.s[1], v16.s[1]", + "mov v5.s[2], v17.s[2]", + "mov v0.16b, v5.16b", + "mov v0.s[3], v16.s[3]", + "mov v16.16b, v0.16b", + "mov v4.s[0], v3.s[0]", + "mov v4.s[1], v2.s[1]", + "mov v4.s[2], v3.s[2]", + "mov v4.s[3], v2.s[3]", + "str q4, [x28, #16]" + ] + }, + "vpblendd ymm0, ymm1, 10101010b": { + "ExpectedInstructionCount": 14, + "Comment": [ + "Map 3 0b01 0x02 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #16]", + "ldr q3, [x28, #32]", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v16.s[0]", + "mov v5.s[1], v17.s[1]", + "mov v5.s[2], v16.s[2]", + "mov v16.16b, v5.16b", + "mov v16.s[3], v17.s[3]", + "mov v4.s[0], v2.s[0]", + "mov v4.s[1], v3.s[1]", + "mov v4.s[2], v2.s[2]", + "mov v4.s[3], v3.s[3]", + "str q4, [x28, #16]" + ] + }, + "vpblendd ymm0, ymm1, 11111111b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 3 0b01 0x02 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.s[0], v17.s[0]", + "mov v4.s[1], v17.s[1]", + "mov v4.s[2], v17.s[2]", + "mov v16.16b, v4.16b", + "mov v16.s[3], v17.s[3]", + "mov v3.s[0], v2.s[0]", + "mov v3.s[1], v2.s[1]", + "mov v3.s[2], v2.s[2]", + "mov v3.s[3], v2.s[3]", + "str q3, [x28, #16]" + ] + }, + "vpermilps xmm0, xmm1, 00000000b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 3 0b01 0x03 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v17.s[0]", + "mov v3.s[1], v17.s[0]", + "mov v3.s[2], v17.s[0]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[0]", + "str q2, [x28, #16]" + ] + }, + "vpermilps xmm0, xmm1, 01010101b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 3 0b01 0x03 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v17.s[1]", + "mov v3.s[1], v17.s[1]", + "mov v3.s[2], v17.s[1]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[1]", + "str q2, [x28, #16]" + ] + }, + "vpermilps xmm0, xmm1, 10101010b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 3 0b01 0x03 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v17.s[2]", + "mov v3.s[1], v17.s[2]", + "mov v3.s[2], v17.s[2]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[2]", + "str q2, [x28, #16]" + ] + }, + "vpermilps xmm0, xmm1, 11111111b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 3 0b01 0x03 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v17.s[3]", + "mov v3.s[1], v17.s[3]", + "mov v3.s[2], v17.s[3]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[3]", + "str q2, [x28, #16]" + ] + }, + "vpermilps ymm0, ymm1, 00000000b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 3 0b01 0x03 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.s[0], v17.s[0]", + "mov v4.s[1], v17.s[0]", + "mov v4.s[2], v17.s[0]", + "mov v16.16b, v4.16b", + "mov v16.s[3], v17.s[0]", + "mov v3.s[0], v2.s[0]", + "mov v3.s[1], v2.s[0]", + "mov v3.s[2], v2.s[0]", + "mov v3.s[3], v2.s[0]", + "str q3, [x28, #16]" + ] + }, + "vpermilps ymm0, ymm1, 01010101b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 3 0b01 0x03 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.s[0], v17.s[1]", + "mov v4.s[1], v17.s[1]", + "mov v4.s[2], v17.s[1]", + "mov v16.16b, v4.16b", + "mov v16.s[3], v17.s[1]", + "mov v3.s[0], v2.s[1]", + "mov v3.s[1], v2.s[1]", + "mov v3.s[2], v2.s[1]", + "mov v3.s[3], v2.s[1]", + "str q3, [x28, #16]" + ] + }, + "vpermilps ymm0, ymm1, 10101010b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 3 0b01 0x03 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.s[0], v17.s[2]", + "mov v4.s[1], v17.s[2]", + "mov v4.s[2], v17.s[2]", + "mov v16.16b, v4.16b", + "mov v16.s[3], v17.s[2]", + "mov v3.s[0], v2.s[2]", + "mov v3.s[1], v2.s[2]", + "mov v3.s[2], v2.s[2]", + "mov v3.s[3], v2.s[2]", + "str q3, [x28, #16]" + ] + }, + "vpermilps ymm0, ymm1, 11111111b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 3 0b01 0x03 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.s[0], v17.s[3]", + "mov v4.s[1], v17.s[3]", + "mov v4.s[2], v17.s[3]", + "mov v16.16b, v4.16b", + "mov v16.s[3], v17.s[3]", + "mov v3.s[0], v2.s[3]", + "mov v3.s[1], v2.s[3]", + "mov v3.s[2], v2.s[3]", + "mov v3.s[3], v2.s[3]", + "str q3, [x28, #16]" + ] + }, + "vpermilpd xmm0, xmm1, 00b": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 3 0b01 0x05 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.d[0], v17.d[0]", + "mov v16.16b, v3.16b", + "mov v16.d[1], v17.d[0]", + "str q2, [x28, #16]" + ] + }, + "vpermilpd xmm0, xmm1, 01b": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 3 0b01 0x05 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.d[0], v17.d[1]", + "mov v16.16b, v3.16b", + "mov v16.d[1], v17.d[0]", + "str q2, [x28, #16]" + ] + }, + "vpermilpd xmm0, xmm1, 10b": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 3 0b01 0x05 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.d[0], v17.d[0]", + "mov v16.16b, v3.16b", + "mov v16.d[1], v17.d[1]", + "str q2, [x28, #16]" + ] + }, + "vpermilpd xmm0, xmm1, 11b": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 3 0b01 0x05 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.d[0], v17.d[1]", + "mov v16.16b, v3.16b", + "mov v16.d[1], v17.d[1]", + "str q2, [x28, #16]" + ] + }, + "vpermilpd ymm0, ymm1, 0000b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x05 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[0]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[0]", + "mov v3.d[0], v2.d[0]", + "mov v3.d[1], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vpermilpd ymm0, ymm1, 0001b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x05 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[1]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[0]", + "mov v3.d[0], v2.d[0]", + "mov v3.d[1], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vpermilpd ymm0, ymm1, 0010b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x05 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[0]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[1]", + "mov v3.d[0], v2.d[0]", + "mov v3.d[1], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vpermilpd ymm0, ymm1, 0011b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x05 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[1]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[1]", + "mov v3.d[0], v2.d[0]", + "mov v3.d[1], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vpermilpd ymm0, ymm1, 0100b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x05 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[0]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[0]", + "mov v3.d[0], v2.d[1]", + "mov v3.d[1], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vpermilpd ymm0, ymm1, 0101b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x05 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[1]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[0]", + "mov v3.d[0], v2.d[1]", + "mov v3.d[1], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vpermilpd ymm0, ymm1, 0110b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x05 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[0]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[1]", + "mov v3.d[0], v2.d[1]", + "mov v3.d[1], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vpermilpd ymm0, ymm1, 0111b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x05 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[1]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[1]", + "mov v3.d[0], v2.d[1]", + "mov v3.d[1], v2.d[0]", + "str q3, [x28, #16]" + ] + }, + "vpermilpd ymm0, ymm1, 1000b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x05 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[0]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[0]", + "mov v3.d[0], v2.d[0]", + "mov v3.d[1], v2.d[1]", + "str q3, [x28, #16]" + ] + }, + "vpermilpd ymm0, ymm1, 1001b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x05 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[1]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[0]", + "mov v3.d[0], v2.d[0]", + "mov v3.d[1], v2.d[1]", + "str q3, [x28, #16]" + ] + }, + "vpermilpd ymm0, ymm1, 1010b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x05 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[0]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[1]", + "mov v3.d[0], v2.d[0]", + "mov v3.d[1], v2.d[1]", + "str q3, [x28, #16]" + ] + }, + "vpermilpd ymm0, ymm1, 1011b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x05 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[1]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[1]", + "mov v3.d[0], v2.d[0]", + "mov v3.d[1], v2.d[1]", + "str q3, [x28, #16]" + ] + }, + "vpermilpd ymm0, ymm1, 1100b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x05 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[0]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[0]", + "mov v3.d[0], v2.d[1]", + "mov v3.d[1], v2.d[1]", + "str q3, [x28, #16]" + ] + }, + "vpermilpd ymm0, ymm1, 1101b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x05 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[1]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[0]", + "mov v3.d[0], v2.d[1]", + "mov v3.d[1], v2.d[1]", + "str q3, [x28, #16]" + ] + }, + "vpermilpd ymm0, ymm1, 1110b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x05 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[0]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[1]", + "mov v3.d[0], v2.d[1]", + "mov v3.d[1], v2.d[1]", + "str q3, [x28, #16]" + ] + }, + "vpermilpd ymm0, ymm1, 1111b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x05 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[1]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[1]", + "mov v3.d[0], v2.d[1]", + "mov v3.d[1], v2.d[1]", + "str q3, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00000000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "str q17, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00000001b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #32]", + "str q17, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00000010b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v18.16b", + "str q17, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00000011b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #48]", + "str q17, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00010000b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00010001b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #32]", + "str q16, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00010010b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v18.16b", + "str q2, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00010011b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q16, [x28, #48]", + "str q2, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00100000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "str q18, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00100001b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #32]", + "str q18, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00100010b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v18.16b", + "str q18, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00100011b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #48]", + "str q18, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00110000b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #48]", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00110001b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #32]", + "ldr q2, [x28, #48]", + "str q2, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00110010b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #48]", + "mov v16.16b, v18.16b", + "str q2, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00110011b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #48]", + "str q16, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00001000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "str q17, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00011000b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v16.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00101000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "str q18, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 00111000b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #48]", + "movi v16.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 10001000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "str q16, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 10000000b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 10000001b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #32]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 10000010b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v18.16b", + "str q2, [x28, #16]" + ] + }, + "vperm2f128 ymm0, ymm1, ymm2, 10000011b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x06 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #48]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundps xmm0, xmm1, 00000000b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "nearest rounding", + "Map 3 0b01 0x08 128-bit" + ], + "ExpectedArm64ASM": [ + "frintn v16.4s, v17.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundps xmm0, xmm1, 00000001b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "-inf rounding", + "Map 3 0b01 0x08 128-bit" + ], + "ExpectedArm64ASM": [ + "frintm v16.4s, v17.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundps xmm0, xmm1, 00000010b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "+inf rounding", + "Map 3 0b01 0x08 128-bit" + ], + "ExpectedArm64ASM": [ + "frintp v16.4s, v17.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundps xmm0, xmm1, 00000011b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "truncate rounding", + "Map 3 0b01 0x08 128-bit" + ], + "ExpectedArm64ASM": [ + "frintz v16.4s, v17.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundps xmm0, xmm1, 00000100b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "host mode rounding", + "Map 3 0b01 0x08 128-bit" + ], + "ExpectedArm64ASM": [ + "frinti v16.4s, v17.4s", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundps ymm0, ymm1, 00000000b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "nearest rounding", + "Map 3 0b01 0x08 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "frintn v16.4s, v17.4s", + "frintn v2.4s, v2.4s", + "str q2, [x28, #16]" + ] + }, + "vroundps ymm0, ymm1, 00000001b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "-inf rounding", + "Map 3 0b01 0x08 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "frintm v16.4s, v17.4s", + "frintm v2.4s, v2.4s", + "str q2, [x28, #16]" + ] + }, + "vroundps ymm0, ymm1, 00000010b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "+inf rounding", + "Map 3 0b01 0x08 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "frintp v16.4s, v17.4s", + "frintp v2.4s, v2.4s", + "str q2, [x28, #16]" + ] + }, + "vroundps ymm0, ymm1, 00000011b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "truncate rounding", + "Map 3 0b01 0x08 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "frintz v16.4s, v17.4s", + "frintz v2.4s, v2.4s", + "str q2, [x28, #16]" + ] + }, + "vroundps ymm0, ymm1, 00000100b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "host mode rounding", + "Map 3 0b01 0x08 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "frinti v16.4s, v17.4s", + "frinti v2.4s, v2.4s", + "str q2, [x28, #16]" + ] + }, + "vroundpd xmm0, xmm1, 00000000b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "nearest rounding", + "Map 3 0b01 0x09 128-bit" + ], + "ExpectedArm64ASM": [ + "frintn v16.2d, v17.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundpd xmm0, xmm1, 00000001b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "-inf rounding", + "Map 3 0b01 0x09 128-bit" + ], + "ExpectedArm64ASM": [ + "frintm v16.2d, v17.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundpd xmm0, xmm1, 00000010b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "+inf rounding", + "Map 3 0b01 0x09 128-bit" + ], + "ExpectedArm64ASM": [ + "frintp v16.2d, v17.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundpd xmm0, xmm1, 00000011b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "truncate rounding", + "Map 3 0b01 0x09 128-bit" + ], + "ExpectedArm64ASM": [ + "frintz v16.2d, v17.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundpd xmm0, xmm1, 00000100b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "host mode rounding", + "Map 3 0b01 0x09 128-bit" + ], + "ExpectedArm64ASM": [ + "frinti v16.2d, v17.2d", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundpd ymm0, ymm1, 00000000b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "nearest rounding", + "Map 3 0b01 0x09 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "frintn v16.2d, v17.2d", + "frintn v2.2d, v2.2d", + "str q2, [x28, #16]" + ] + }, + "vroundpd ymm0, ymm1, 00000001b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "-inf rounding", + "Map 3 0b01 0x09 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "frintm v16.2d, v17.2d", + "frintm v2.2d, v2.2d", + "str q2, [x28, #16]" + ] + }, + "vroundpd ymm0, ymm1, 00000010b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "+inf rounding", + "Map 3 0b01 0x09 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "frintp v16.2d, v17.2d", + "frintp v2.2d, v2.2d", + "str q2, [x28, #16]" + ] + }, + "vroundpd ymm0, ymm1, 00000011b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "truncate rounding", + "Map 3 0b01 0x09 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "frintz v16.2d, v17.2d", + "frintz v2.2d, v2.2d", + "str q2, [x28, #16]" + ] + }, + "vroundpd ymm0, ymm1, 00000100b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "host mode rounding", + "Map 3 0b01 0x09 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "frinti v16.2d, v17.2d", + "frinti v2.2d, v2.2d", + "str q2, [x28, #16]" + ] + }, + "vroundss xmm0, xmm1, 00000000b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "nearest rounding", + "Map 3 0b01 0x0a 128-bit" + ], + "ExpectedArm64ASM": [ + "frintn s0, s17", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundss xmm0, xmm1, 00000001b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "-inf rounding", + "Map 3 0b01 0x0a 128-bit" + ], + "ExpectedArm64ASM": [ + "frintm s0, s17", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundss xmm0, xmm1, 00000010b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "+inf rounding", + "Map 3 0b01 0x0a 128-bit" + ], + "ExpectedArm64ASM": [ + "frintp s0, s17", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundss xmm0, xmm1, 00000011b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "truncate rounding", + "Map 3 0b01 0x0a 128-bit" + ], + "ExpectedArm64ASM": [ + "frintz s0, s17", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundss xmm0, xmm1, 00000100b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "host mode rounding", + "Map 3 0b01 0x0a 128-bit" + ], + "ExpectedArm64ASM": [ + "frinti s0, s17", + "mov v16.s[0], v0.s[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundsd xmm0, xmm1, 00000000b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "nearest rounding", + "Map 3 0b01 0x0b 128-bit" + ], + "ExpectedArm64ASM": [ + "frintn d0, d17", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundsd xmm0, xmm1, 00000001b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "-inf rounding", + "Map 3 0b01 0x0b 128-bit" + ], + "ExpectedArm64ASM": [ + "frintm d0, d17", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundsd xmm0, xmm1, 00000010b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "+inf rounding", + "Map 3 0b01 0x0b 128-bit" + ], + "ExpectedArm64ASM": [ + "frintp d0, d17", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundsd xmm0, xmm1, 00000011b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "truncate rounding", + "Map 3 0b01 0x0b 128-bit" + ], + "ExpectedArm64ASM": [ + "frintz d0, d17", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vroundsd xmm0, xmm1, 00000100b": { + "ExpectedInstructionCount": 4, + "Comment": [ + "host mode rounding", + "Map 3 0b01 0x0b 128-bit" + ], + "ExpectedArm64ASM": [ + "frinti d0, d17", + "mov v16.d[0], v0.d[0]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vblendps xmm0, xmm1, xmm2, 0000b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 3 0b01 0x0c 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v17.s[0]", + "mov v3.s[1], v17.s[1]", + "mov v3.s[2], v17.s[2]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[3]", + "str q2, [x28, #16]" + ] + }, + "vblendps xmm0, xmm1, xmm2, 0001b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 3 0b01 0x0c 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v18.s[0]", + "mov v3.s[1], v17.s[1]", + "mov v3.s[2], v17.s[2]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v17.s[3]", + "str q2, [x28, #16]" + ] + }, + "vblendps xmm0, xmm1, xmm2, 1111b": { + "ExpectedInstructionCount": 8, + "Comment": [ + "Map 3 0b01 0x0c 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.s[0], v18.s[0]", + "mov v3.s[1], v18.s[1]", + "mov v3.s[2], v18.s[2]", + "mov v16.16b, v3.16b", + "mov v16.s[3], v18.s[3]", + "str q2, [x28, #16]" + ] + }, + "vblendps ymm0, ymm1, ymm2, 00000000b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 3 0b01 0x0c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.s[0], v17.s[0]", + "mov v4.s[1], v17.s[1]", + "mov v4.s[2], v17.s[2]", + "mov v16.16b, v4.16b", + "mov v16.s[3], v17.s[3]", + "mov v3.s[0], v2.s[0]", + "mov v3.s[1], v2.s[1]", + "mov v3.s[2], v2.s[2]", + "mov v3.s[3], v2.s[3]", + "str q3, [x28, #16]" + ] + }, + "vblendps ymm0, ymm1, ymm2, 10000001b": { + "ExpectedInstructionCount": 14, + "Comment": [ + "Map 3 0b01 0x0c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.s[0], v18.s[0]", + "mov v5.s[1], v17.s[1]", + "mov v5.s[2], v17.s[2]", + "mov v16.16b, v5.16b", + "mov v16.s[3], v17.s[3]", + "mov v4.s[0], v3.s[0]", + "mov v4.s[1], v2.s[1]", + "mov v4.s[2], v2.s[2]", + "mov v4.s[3], v2.s[3]", + "str q4, [x28, #16]" + ] + }, + "vblendps ymm0, ymm1, ymm2, 11111111b": { + "ExpectedInstructionCount": 13, + "Comment": [ + "Map 3 0b01 0x0c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #48]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.s[0], v18.s[0]", + "mov v4.s[1], v18.s[1]", + "mov v4.s[2], v18.s[2]", + "mov v16.16b, v4.16b", + "mov v16.s[3], v18.s[3]", + "mov v3.s[0], v2.s[0]", + "mov v3.s[1], v2.s[1]", + "mov v3.s[2], v2.s[2]", + "mov v3.s[3], v2.s[3]", + "str q3, [x28, #16]" + ] + }, + "vblendpd xmm0, xmm1, xmm2, 00b": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 3 0b01 0x0d 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.d[0], v17.d[0]", + "mov v16.16b, v3.16b", + "mov v16.d[1], v17.d[1]", + "str q2, [x28, #16]" + ] + }, + "vblendpd xmm0, xmm1, xmm2, 01b": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 3 0b01 0x0d 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.d[0], v18.d[0]", + "mov v16.16b, v3.16b", + "mov v16.d[1], v17.d[1]", + "str q2, [x28, #16]" + ] + }, + "vblendpd xmm0, xmm1, xmm2, 10b": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 3 0b01 0x0d 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.d[0], v17.d[0]", + "mov v16.16b, v3.16b", + "mov v16.d[1], v18.d[1]", + "str q2, [x28, #16]" + ] + }, + "vblendpd xmm0, xmm1, xmm2, 11b": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 3 0b01 0x0d 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.d[0], v18.d[0]", + "mov v16.16b, v3.16b", + "mov v16.d[1], v18.d[1]", + "str q2, [x28, #16]" + ] + }, + "vblendpd ymm0, ymm1, ymm2, 0000b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x0d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[0]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[1]", + "mov v3.d[0], v2.d[0]", + "mov v3.d[1], v2.d[1]", + "str q3, [x28, #16]" + ] + }, + "vblendpd ymm0, ymm1, ymm2, 0001b": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 3 0b01 0x0d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.d[0], v18.d[0]", + "mov v16.16b, v5.16b", + "mov v16.d[1], v17.d[1]", + "mov v4.d[0], v3.d[0]", + "mov v4.d[1], v2.d[1]", + "str q4, [x28, #16]" + ] + }, + "vblendpd ymm0, ymm1, ymm2, 0010b": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 3 0b01 0x0d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.d[0], v17.d[0]", + "mov v16.16b, v5.16b", + "mov v16.d[1], v18.d[1]", + "mov v4.d[0], v2.d[0]", + "mov v4.d[1], v3.d[1]", + "str q4, [x28, #16]" + ] + }, + "vblendpd ymm0, ymm1, ymm2, 0011b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x0d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #48]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v18.d[0]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v18.d[1]", + "mov v3.d[0], v2.d[0]", + "mov v3.d[1], v2.d[1]", + "str q3, [x28, #16]" + ] + }, + "vblendpd ymm0, ymm1, ymm2, 0100b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x0d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[0]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[1]", + "mov v3.d[0], v2.d[0]", + "mov v3.d[1], v2.d[1]", + "str q3, [x28, #16]" + ] + }, + "vblendpd ymm0, ymm1, ymm2, 0101b": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 3 0b01 0x0d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.d[0], v18.d[0]", + "mov v16.16b, v5.16b", + "mov v16.d[1], v17.d[1]", + "mov v4.d[0], v3.d[0]", + "mov v4.d[1], v2.d[1]", + "str q4, [x28, #16]" + ] + }, + "vblendpd ymm0, ymm1, ymm2, 0110b": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 3 0b01 0x0d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.d[0], v17.d[0]", + "mov v16.16b, v5.16b", + "mov v16.d[1], v18.d[1]", + "mov v4.d[0], v2.d[0]", + "mov v4.d[1], v3.d[1]", + "str q4, [x28, #16]" + ] + }, + "vblendpd ymm0, ymm1, ymm2, 0111b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x0d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #48]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v18.d[0]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v18.d[1]", + "mov v3.d[0], v2.d[0]", + "mov v3.d[1], v2.d[1]", + "str q3, [x28, #16]" + ] + }, + "vblendpd ymm0, ymm1, ymm2, 1000b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x0d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[0]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[1]", + "mov v3.d[0], v2.d[0]", + "mov v3.d[1], v2.d[1]", + "str q3, [x28, #16]" + ] + }, + "vblendpd ymm0, ymm1, ymm2, 1001b": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 3 0b01 0x0d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.d[0], v18.d[0]", + "mov v16.16b, v5.16b", + "mov v16.d[1], v17.d[1]", + "mov v4.d[0], v3.d[0]", + "mov v4.d[1], v2.d[1]", + "str q4, [x28, #16]" + ] + }, + "vblendpd ymm0, ymm1, ymm2, 1010b": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 3 0b01 0x0d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.d[0], v17.d[0]", + "mov v16.16b, v5.16b", + "mov v16.d[1], v18.d[1]", + "mov v4.d[0], v2.d[0]", + "mov v4.d[1], v3.d[1]", + "str q4, [x28, #16]" + ] + }, + "vblendpd ymm0, ymm1, ymm2, 1011b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x0d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #48]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v18.d[0]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v18.d[1]", + "mov v3.d[0], v2.d[0]", + "mov v3.d[1], v2.d[1]", + "str q3, [x28, #16]" + ] + }, + "vblendpd ymm0, ymm1, ymm2, 1100b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x0d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v17.d[0]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v17.d[1]", + "mov v3.d[0], v2.d[0]", + "mov v3.d[1], v2.d[1]", + "str q3, [x28, #16]" + ] + }, + "vblendpd ymm0, ymm1, ymm2, 1101b": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 3 0b01 0x0d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.d[0], v18.d[0]", + "mov v16.16b, v5.16b", + "mov v16.d[1], v17.d[1]", + "mov v4.d[0], v3.d[0]", + "mov v4.d[1], v2.d[1]", + "str q4, [x28, #16]" + ] + }, + "vblendpd ymm0, ymm1, ymm2, 1110b": { + "ExpectedInstructionCount": 10, + "Comment": [ + "Map 3 0b01 0x0d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.d[0], v17.d[0]", + "mov v16.16b, v5.16b", + "mov v16.d[1], v18.d[1]", + "mov v4.d[0], v2.d[0]", + "mov v4.d[1], v3.d[1]", + "str q4, [x28, #16]" + ] + }, + "vblendpd ymm0, ymm1, ymm2, 1111b": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x0d 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #48]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.d[0], v18.d[0]", + "mov v16.16b, v4.16b", + "mov v16.d[1], v18.d[1]", + "mov v3.d[0], v2.d[0]", + "mov v3.d[1], v2.d[1]", + "str q3, [x28, #16]" + ] + }, + "vpblendw xmm0, xmm1, xmm2, 00000000b": { + "ExpectedInstructionCount": 12, + "Comment": [ + "Map 3 0b01 0x0e 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.h[0], v17.h[0]", + "mov v3.h[1], v17.h[1]", + "mov v3.h[2], v17.h[2]", + "mov v3.h[3], v17.h[3]", + "mov v3.h[4], v17.h[4]", + "mov v3.h[5], v17.h[5]", + "mov v3.h[6], v17.h[6]", + "mov v16.16b, v3.16b", + "mov v16.h[7], v17.h[7]", + "str q2, [x28, #16]" + ] + }, + "vpblendw xmm0, xmm1, xmm2, 00000001b": { + "ExpectedInstructionCount": 12, + "Comment": [ + "Map 3 0b01 0x0e 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.h[0], v18.h[0]", + "mov v3.h[1], v17.h[1]", + "mov v3.h[2], v17.h[2]", + "mov v3.h[3], v17.h[3]", + "mov v3.h[4], v17.h[4]", + "mov v3.h[5], v17.h[5]", + "mov v3.h[6], v17.h[6]", + "mov v16.16b, v3.16b", + "mov v16.h[7], v17.h[7]", + "str q2, [x28, #16]" + ] + }, + "vpblendw xmm0, xmm1, xmm2, 11111111b": { + "ExpectedInstructionCount": 12, + "Comment": [ + "Map 3 0b01 0x0e 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v3.16b, v2.16b", + "mov v3.h[0], v18.h[0]", + "mov v3.h[1], v18.h[1]", + "mov v3.h[2], v18.h[2]", + "mov v3.h[3], v18.h[3]", + "mov v3.h[4], v18.h[4]", + "mov v3.h[5], v18.h[5]", + "mov v3.h[6], v18.h[6]", + "mov v16.16b, v3.16b", + "mov v16.h[7], v18.h[7]", + "str q2, [x28, #16]" + ] + }, + "vpblendw ymm0, ymm1, ymm2, 00000000b": { + "ExpectedInstructionCount": 21, + "Comment": [ + "Map 3 0b01 0x0e 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.h[0], v17.h[0]", + "mov v4.h[1], v17.h[1]", + "mov v4.h[2], v17.h[2]", + "mov v4.h[3], v17.h[3]", + "mov v4.h[4], v17.h[4]", + "mov v4.h[5], v17.h[5]", + "mov v4.h[6], v17.h[6]", + "mov v16.16b, v4.16b", + "mov v16.h[7], v17.h[7]", + "mov v3.h[0], v2.h[0]", + "mov v3.h[1], v2.h[1]", + "mov v3.h[2], v2.h[2]", + "mov v3.h[3], v2.h[3]", + "mov v3.h[4], v2.h[4]", + "mov v3.h[5], v2.h[5]", + "mov v3.h[6], v2.h[6]", + "mov v3.h[7], v2.h[7]", + "str q3, [x28, #16]" + ] + }, + "vpblendw ymm0, ymm1, ymm2, 00000001b": { + "ExpectedInstructionCount": 22, + "Comment": [ + "Map 3 0b01 0x0e 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v4.2d, #0x0", + "mov v5.16b, v4.16b", + "mov v5.h[0], v18.h[0]", + "mov v5.h[1], v17.h[1]", + "mov v5.h[2], v17.h[2]", + "mov v5.h[3], v17.h[3]", + "mov v5.h[4], v17.h[4]", + "mov v5.h[5], v17.h[5]", + "mov v5.h[6], v17.h[6]", + "mov v16.16b, v5.16b", + "mov v16.h[7], v17.h[7]", + "mov v4.h[0], v3.h[0]", + "mov v4.h[1], v2.h[1]", + "mov v4.h[2], v2.h[2]", + "mov v4.h[3], v2.h[3]", + "mov v4.h[4], v2.h[4]", + "mov v4.h[5], v2.h[5]", + "mov v4.h[6], v2.h[6]", + "mov v4.h[7], v2.h[7]", + "str q4, [x28, #16]" + ] + }, + "vpblendw ymm0, ymm1, ymm2, 11111111b": { + "ExpectedInstructionCount": 21, + "Comment": [ + "Map 3 0b01 0x0e 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #48]", + "movi v3.2d, #0x0", + "mov v4.16b, v3.16b", + "mov v4.h[0], v18.h[0]", + "mov v4.h[1], v18.h[1]", + "mov v4.h[2], v18.h[2]", + "mov v4.h[3], v18.h[3]", + "mov v4.h[4], v18.h[4]", + "mov v4.h[5], v18.h[5]", + "mov v4.h[6], v18.h[6]", + "mov v16.16b, v4.16b", + "mov v16.h[7], v18.h[7]", + "mov v3.h[0], v2.h[0]", + "mov v3.h[1], v2.h[1]", + "mov v3.h[2], v2.h[2]", + "mov v3.h[3], v2.h[3]", + "mov v3.h[4], v2.h[4]", + "mov v3.h[5], v2.h[5]", + "mov v3.h[6], v2.h[6]", + "mov v3.h[7], v2.h[7]", + "str q3, [x28, #16]" + ] + }, + "vpalignr xmm0, xmm1, xmm2, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x0f 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v18.16b", + "str q2, [x28, #16]" + ] + }, + "vpalignr xmm0, xmm1, xmm2, 1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x0f 128-bit" + ], + "ExpectedArm64ASM": [ + "ext v16.16b, v18.16b, v17.16b, #1", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpalignr xmm0, xmm1, xmm2, 15": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x0f 128-bit" + ], + "ExpectedArm64ASM": [ + "ext v16.16b, v18.16b, v17.16b, #15", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpalignr xmm0, xmm1, xmm2, 16": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 3 0b01 0x0f 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v0.2d, #0x0", + "ext v16.16b, v17.16b, v0.16b, #0", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpalignr ymm0, ymm1, ymm2, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x0f 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #48]", + "mov v16.16b, v18.16b", + "str q2, [x28, #16]" + ] + }, + "vpalignr ymm0, ymm1, ymm2, 1": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 3 0b01 0x0f 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "ext v16.16b, v18.16b, v17.16b, #1", + "ext v2.16b, v3.16b, v2.16b, #1", + "str q2, [x28, #16]" + ] + }, + "vpalignr ymm0, ymm1, ymm2, 15": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 3 0b01 0x0f 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "ext v16.16b, v18.16b, v17.16b, #15", + "ext v2.16b, v3.16b, v2.16b, #15", + "str q2, [x28, #16]" + ] + }, + "vpalignr ymm0, ymm1, ymm2, 16": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 3 0b01 0x0f 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "movi v0.2d, #0x0", + "ext v16.16b, v17.16b, v0.16b, #0", + "movi v0.2d, #0x0", + "ext v2.16b, v2.16b, v0.16b, #0", + "str q2, [x28, #16]" + ] + }, + "vpextrb rax, xmm0, 0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 3 0b01 0x14 128-bit" + ], + "ExpectedArm64ASM": [ + "umov w4, v16.b[0]" + ] + }, + "vpextrb rax, xmm0, 15": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 3 0b01 0x14 128-bit" + ], + "ExpectedArm64ASM": [ + "umov w4, v16.b[15]" + ] + }, + "vpextrw rax, xmm0, 0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 3 0b01 0x15 128-bit" + ], + "ExpectedArm64ASM": [ + "umov w4, v16.h[0]" + ] + }, + "vpextrw rax, xmm0, 7": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 3 0b01 0x15 128-bit" + ], + "ExpectedArm64ASM": [ + "umov w4, v16.h[7]" + ] + }, + "vpextrd rax, xmm0, 0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 3 0b01 0x16 128-bit" + ], + "ExpectedArm64ASM": [ + "mov w4, v16.s[0]" + ] + }, + "vpextrd rax, xmm0, 3": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 3 0b01 0x16 128-bit" + ], + "ExpectedArm64ASM": [ + "mov w4, v16.s[3]" + ] + }, + "vpextrb [rax], xmm0, 0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 3 0b01 0x14 128-bit" + ], + "ExpectedArm64ASM": [ + "st1 {v16.b}[0], [x4]" + ] + }, + "vpextrb [rax], xmm0, 15": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 3 0b01 0x14 128-bit" + ], + "ExpectedArm64ASM": [ + "st1 {v16.b}[15], [x4]" + ] + }, + "vpextrw [rax], xmm0, 0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 3 0b01 0x15 128-bit" + ], + "ExpectedArm64ASM": [ + "st1 {v16.h}[0], [x4]" + ] + }, + "vpextrw [rax], xmm0, 7": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 3 0b01 0x15 128-bit" + ], + "ExpectedArm64ASM": [ + "st1 {v16.h}[7], [x4]" + ] + }, + "vpextrd [rax], xmm0, 0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 3 0b01 0x16 128-bit" + ], + "ExpectedArm64ASM": [ + "st1 {v16.s}[0], [x4]" + ] + }, + "vpextrd [rax], xmm0, 3": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 3 0b01 0x16 128-bit" + ], + "ExpectedArm64ASM": [ + "st1 {v16.s}[3], [x4]" + ] + }, + "vextractps eax, xmm0, 0": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 3 0b01 0x17 128-bit" + ], + "ExpectedArm64ASM": [ + "mov w4, v16.s[0]" + ] + }, + "vextractps eax, xmm0, 3": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 3 0b01 0x17 128-bit" + ], + "ExpectedArm64ASM": [ + "mov w4, v16.s[3]" + ] + }, + "vinsertf128 ymm0, ymm1, xmm2, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x18 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v18.16b", + "str q2, [x28, #16]" + ] + }, + "vinsertf128 ymm0, ymm1, xmm2, 1": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x18 256-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "str q18, [x28, #16]" + ] + }, + "vextractf128 xmm0, ymm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x19 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vextractf128 xmm0, ymm1, 1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x19 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #32]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vcvtps2ph xmm0, xmm1, 00000000b": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "nearest rounding", + "Map 3 0b01 0x1D 128-bit" + ] + }, + "vcvtps2ph xmm0, xmm1, 00000001b": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "-inf rounding", + "Map 3 0b01 0x1D 128-bit" + ] + }, + "vcvtps2ph xmm0, xmm1, 00000010b": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "+inf rounding", + "Map 3 0b01 0x1D 128-bit" + ] + }, + "vcvtps2ph xmm0, xmm1, 00000011b": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "truncate rounding", + "Map 3 0b01 0x1D 128-bit" + ] + }, + "vcvtps2ph xmm0, xmm1, 00000100b": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "host mode rounding", + "Map 3 0b01 0x1D 128-bit" + ] + }, + "vcvtps2ph xmm0, ymm1, 00000000b": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "nearest rounding", + "Map 3 0b01 0x1D 256-bit" + ] + }, + "vcvtps2ph xmm0, ymm1, 00000001b": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "-inf rounding", + "Map 3 0b01 0x1D 256-bit" + ] + }, + "vcvtps2ph xmm0, ymm1, 00000010b": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "+inf rounding", + "Map 3 0b01 0x1D 256-bit" + ] + }, + "vcvtps2ph xmm0, ymm1, 00000011b": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "truncate rounding", + "Map 3 0b01 0x1D 256-bit" + ] + }, + "vcvtps2ph xmm0, ymm1, 00000100b": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "host mode rounding", + "Map 3 0b01 0x1D 256-bit" + ] + }, + "vpinsrb xmm0, xmm0, eax, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x20 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.b[0], w4", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpinsrb xmm0, xmm1, eax, 0": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 3 0b01 0x20 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "mov v16.b[0], w4", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpinsrb xmm0, xmm1, eax, 15": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 3 0b01 0x20 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "mov v16.b[15], w4", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vinsertps xmm0, xmm1, xmm2, ((0b00 << 6) | (0b00 << 4) | (0b0000))": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x21 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "mov v16.s[0], v18.s[0]" + ] + }, + "vinsertps xmm0, xmm1, xmm2, ((0b00 << 6) | (0b00 << 4) | (0b1111))": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 3 0b01 0x21 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0" + ] + }, + "vinsertps xmm0, xmm1, xmm2, ((0b11 << 6) | (0b11 << 4) | (0b0000))": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x21 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "mov v16.s[3], v18.s[3]" + ] + }, + "vpinsrd xmm0, xmm0, eax, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x22 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.s[0], w4", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpinsrd xmm0, xmm1, eax, 0": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 3 0b01 0x22 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "mov v16.s[0], w4", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpinsrd xmm0, xmm1, eax, 3": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 3 0b01 0x22 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "mov v16.s[3], w4", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpinsrq xmm0, xmm0, rax, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x22 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.d[0], x4", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpinsrq xmm0, xmm1, rax, 0": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 3 0b01 0x22 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "mov v16.d[0], x4", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpinsrq xmm0, xmm1, rax, 1": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map 3 0b01 0x22 128-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "mov v16.d[1], x4", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vinserti128 ymm0, ymm1, xmm2, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x38 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v18.16b", + "str q2, [x28, #16]" + ] + }, + "vinserti128 ymm0, ymm1, xmm2, 1": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x38 256-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "str q18, [x28, #16]" + ] + }, + "vextracti128 xmm0, ymm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x39 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vextracti128 xmm0, ymm1, 1": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x39 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #32]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vdpps xmm0, xmm1, xmm2, 00000000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x40 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "str q16, [x28, #16]" + ] + }, + "vdpps xmm0, xmm1, xmm2, 00001111b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x40 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "str q16, [x28, #16]" + ] + }, + "vdpps xmm0, xmm1, xmm2, 11110000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x40 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "str q16, [x28, #16]" + ] + }, + "vdpps xmm0, xmm1, xmm2, 11111111b": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 3 0b01 0x40 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "fmul v3.4s, v17.4s, v18.4s", + "faddp v3.4s, v3.4s, v3.4s", + "faddp s3, v3.2s", + "dup v16.4s, v3.s[0]", + "str q2, [x28, #16]" + ] + }, + "vdpps ymm0, ymm1, ymm2, 00000000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x40 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "str q16, [x28, #16]" + ] + }, + "vdpps ymm0, ymm1, ymm2, 00001111b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x40 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "str q16, [x28, #16]" + ] + }, + "vdpps ymm0, ymm1, ymm2, 11110000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x40 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "str q16, [x28, #16]" + ] + }, + "vdpps ymm0, ymm1, ymm2, 11111111b": { + "ExpectedInstructionCount": 11, + "Comment": [ + "Map 3 0b01 0x40 128-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "fmul v4.4s, v17.4s, v18.4s", + "faddp v4.4s, v4.4s, v4.4s", + "faddp s4, v4.2s", + "dup v16.4s, v4.s[0]", + "fmul v2.4s, v2.4s, v3.4s", + "faddp v2.4s, v2.4s, v2.4s", + "faddp s2, v2.2s", + "dup v2.4s, v2.s[0]", + "str q2, [x28, #16]" + ] + }, + "vdppd xmm0, xmm1, xmm2, 00000000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x41 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "str q16, [x28, #16]" + ] + }, + "vdppd xmm0, xmm1, xmm2, 00001111b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x41 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "str q16, [x28, #16]" + ] + }, + "vdppd xmm0, xmm1, xmm2, 11110000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x41 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "str q16, [x28, #16]" + ] + }, + "vdppd xmm0, xmm1, xmm2, 11111111b": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 3 0b01 0x41 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "fmul v3.2d, v17.2d, v18.2d", + "faddp d3, v3.2d", + "dup v16.2d, v3.d[0]", + "str q2, [x28, #16]" + ] + }, + "vmpsadbw xmm0, xmm1, xmm2, 000b": { + "ExpectedInstructionCount": 16, + "Comment": [ + "Map 3 0b01 0x42 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "dup v3.4s, v18.s[0]", + "ext v4.16b, v17.16b, v17.16b, #0", + "ext v5.16b, v17.16b, v17.16b, #1", + "ext v6.16b, v17.16b, v17.16b, #2", + "ext v7.16b, v17.16b, v17.16b, #3", + "uabdl v4.8h, v4.8b, v3.8b", + "uabdl v5.8h, v5.8b, v3.8b", + "uabdl v6.8h, v6.8b, v3.8b", + "uabdl v3.8h, v7.8b, v3.8b", + "addp v4.8h, v4.8h, v6.8h", + "addp v3.8h, v5.8h, v3.8h", + "trn1 v5.4s, v4.4s, v3.4s", + "trn2 v3.4s, v4.4s, v3.4s", + "addp v16.8h, v5.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vmpsadbw xmm0, xmm1, xmm2, 001b": { + "ExpectedInstructionCount": 16, + "Comment": [ + "Map 3 0b01 0x42 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "dup v3.4s, v18.s[1]", + "ext v4.16b, v17.16b, v17.16b, #0", + "ext v5.16b, v17.16b, v17.16b, #1", + "ext v6.16b, v17.16b, v17.16b, #2", + "ext v7.16b, v17.16b, v17.16b, #3", + "uabdl v4.8h, v4.8b, v3.8b", + "uabdl v5.8h, v5.8b, v3.8b", + "uabdl v6.8h, v6.8b, v3.8b", + "uabdl v3.8h, v7.8b, v3.8b", + "addp v4.8h, v4.8h, v6.8h", + "addp v3.8h, v5.8h, v3.8h", + "trn1 v5.4s, v4.4s, v3.4s", + "trn2 v3.4s, v4.4s, v3.4s", + "addp v16.8h, v5.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vmpsadbw xmm0, xmm1, xmm2, 010b": { + "ExpectedInstructionCount": 16, + "Comment": [ + "Map 3 0b01 0x42 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "dup v3.4s, v18.s[2]", + "ext v4.16b, v17.16b, v17.16b, #0", + "ext v5.16b, v17.16b, v17.16b, #1", + "ext v6.16b, v17.16b, v17.16b, #2", + "ext v7.16b, v17.16b, v17.16b, #3", + "uabdl v4.8h, v4.8b, v3.8b", + "uabdl v5.8h, v5.8b, v3.8b", + "uabdl v6.8h, v6.8b, v3.8b", + "uabdl v3.8h, v7.8b, v3.8b", + "addp v4.8h, v4.8h, v6.8h", + "addp v3.8h, v5.8h, v3.8h", + "trn1 v5.4s, v4.4s, v3.4s", + "trn2 v3.4s, v4.4s, v3.4s", + "addp v16.8h, v5.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vmpsadbw xmm0, xmm1, xmm2, 011b": { + "ExpectedInstructionCount": 16, + "Comment": [ + "Map 3 0b01 0x42 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "dup v3.4s, v18.s[3]", + "ext v4.16b, v17.16b, v17.16b, #0", + "ext v5.16b, v17.16b, v17.16b, #1", + "ext v6.16b, v17.16b, v17.16b, #2", + "ext v7.16b, v17.16b, v17.16b, #3", + "uabdl v4.8h, v4.8b, v3.8b", + "uabdl v5.8h, v5.8b, v3.8b", + "uabdl v6.8h, v6.8b, v3.8b", + "uabdl v3.8h, v7.8b, v3.8b", + "addp v4.8h, v4.8h, v6.8h", + "addp v3.8h, v5.8h, v3.8h", + "trn1 v5.4s, v4.4s, v3.4s", + "trn2 v3.4s, v4.4s, v3.4s", + "addp v16.8h, v5.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vmpsadbw xmm0, xmm1, xmm2, 100b": { + "ExpectedInstructionCount": 16, + "Comment": [ + "Map 3 0b01 0x42 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "dup v3.4s, v18.s[0]", + "ext v4.16b, v17.16b, v17.16b, #4", + "ext v5.16b, v17.16b, v17.16b, #5", + "ext v6.16b, v17.16b, v17.16b, #6", + "ext v7.16b, v17.16b, v17.16b, #7", + "uabdl v4.8h, v4.8b, v3.8b", + "uabdl v5.8h, v5.8b, v3.8b", + "uabdl v6.8h, v6.8b, v3.8b", + "uabdl v3.8h, v7.8b, v3.8b", + "addp v4.8h, v4.8h, v6.8h", + "addp v3.8h, v5.8h, v3.8h", + "trn1 v5.4s, v4.4s, v3.4s", + "trn2 v3.4s, v4.4s, v3.4s", + "addp v16.8h, v5.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vmpsadbw xmm0, xmm1, xmm2, 101b": { + "ExpectedInstructionCount": 16, + "Comment": [ + "Map 3 0b01 0x42 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "dup v3.4s, v18.s[1]", + "ext v4.16b, v17.16b, v17.16b, #4", + "ext v5.16b, v17.16b, v17.16b, #5", + "ext v6.16b, v17.16b, v17.16b, #6", + "ext v7.16b, v17.16b, v17.16b, #7", + "uabdl v4.8h, v4.8b, v3.8b", + "uabdl v5.8h, v5.8b, v3.8b", + "uabdl v6.8h, v6.8b, v3.8b", + "uabdl v3.8h, v7.8b, v3.8b", + "addp v4.8h, v4.8h, v6.8h", + "addp v3.8h, v5.8h, v3.8h", + "trn1 v5.4s, v4.4s, v3.4s", + "trn2 v3.4s, v4.4s, v3.4s", + "addp v16.8h, v5.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vmpsadbw xmm0, xmm1, xmm2, 110b": { + "ExpectedInstructionCount": 16, + "Comment": [ + "Map 3 0b01 0x42 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "dup v3.4s, v18.s[2]", + "ext v4.16b, v17.16b, v17.16b, #4", + "ext v5.16b, v17.16b, v17.16b, #5", + "ext v6.16b, v17.16b, v17.16b, #6", + "ext v7.16b, v17.16b, v17.16b, #7", + "uabdl v4.8h, v4.8b, v3.8b", + "uabdl v5.8h, v5.8b, v3.8b", + "uabdl v6.8h, v6.8b, v3.8b", + "uabdl v3.8h, v7.8b, v3.8b", + "addp v4.8h, v4.8h, v6.8h", + "addp v3.8h, v5.8h, v3.8h", + "trn1 v5.4s, v4.4s, v3.4s", + "trn2 v3.4s, v4.4s, v3.4s", + "addp v16.8h, v5.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vmpsadbw xmm0, xmm1, xmm2, 111b": { + "ExpectedInstructionCount": 16, + "Comment": [ + "Map 3 0b01 0x42 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "dup v3.4s, v18.s[3]", + "ext v4.16b, v17.16b, v17.16b, #4", + "ext v5.16b, v17.16b, v17.16b, #5", + "ext v6.16b, v17.16b, v17.16b, #6", + "ext v7.16b, v17.16b, v17.16b, #7", + "uabdl v4.8h, v4.8b, v3.8b", + "uabdl v5.8h, v5.8b, v3.8b", + "uabdl v6.8h, v6.8b, v3.8b", + "uabdl v3.8h, v7.8b, v3.8b", + "addp v4.8h, v4.8h, v6.8h", + "addp v3.8h, v5.8h, v3.8h", + "trn1 v5.4s, v4.4s, v3.4s", + "trn2 v3.4s, v4.4s, v3.4s", + "addp v16.8h, v5.8h, v3.8h", + "str q2, [x28, #16]" + ] + }, + "vmpsadbw ymm0, ymm1, ymm2, 000b": { + "ExpectedInstructionCount": 31, + "Comment": [ + "Map 3 0b01 0x42 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "dup v4.4s, v18.s[0]", + "ext v5.16b, v17.16b, v17.16b, #0", + "ext v6.16b, v17.16b, v17.16b, #1", + "ext v7.16b, v17.16b, v17.16b, #2", + "ext v8.16b, v17.16b, v17.16b, #3", + "uabdl v5.8h, v5.8b, v4.8b", + "uabdl v6.8h, v6.8b, v4.8b", + "uabdl v7.8h, v7.8b, v4.8b", + "uabdl v4.8h, v8.8b, v4.8b", + "addp v5.8h, v5.8h, v7.8h", + "addp v4.8h, v6.8h, v4.8h", + "trn1 v6.4s, v5.4s, v4.4s", + "trn2 v4.4s, v5.4s, v4.4s", + "addp v16.8h, v6.8h, v4.8h", + "dup v3.4s, v3.s[0]", + "ext v4.16b, v2.16b, v2.16b, #0", + "ext v5.16b, v2.16b, v2.16b, #1", + "ext v6.16b, v2.16b, v2.16b, #2", + "ext v2.16b, v2.16b, v2.16b, #3", + "uabdl v4.8h, v4.8b, v3.8b", + "uabdl v5.8h, v5.8b, v3.8b", + "uabdl v6.8h, v6.8b, v3.8b", + "uabdl v2.8h, v2.8b, v3.8b", + "addp v3.8h, v4.8h, v6.8h", + "addp v2.8h, v5.8h, v2.8h", + "trn1 v4.4s, v3.4s, v2.4s", + "trn2 v2.4s, v3.4s, v2.4s", + "addp v2.8h, v4.8h, v2.8h", + "str q2, [x28, #16]" + ] + }, + "vmpsadbw ymm0, ymm1, ymm2, 001b": { + "ExpectedInstructionCount": 31, + "Comment": [ + "Map 3 0b01 0x42 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "dup v4.4s, v18.s[1]", + "ext v5.16b, v17.16b, v17.16b, #0", + "ext v6.16b, v17.16b, v17.16b, #1", + "ext v7.16b, v17.16b, v17.16b, #2", + "ext v8.16b, v17.16b, v17.16b, #3", + "uabdl v5.8h, v5.8b, v4.8b", + "uabdl v6.8h, v6.8b, v4.8b", + "uabdl v7.8h, v7.8b, v4.8b", + "uabdl v4.8h, v8.8b, v4.8b", + "addp v5.8h, v5.8h, v7.8h", + "addp v4.8h, v6.8h, v4.8h", + "trn1 v6.4s, v5.4s, v4.4s", + "trn2 v4.4s, v5.4s, v4.4s", + "addp v16.8h, v6.8h, v4.8h", + "dup v3.4s, v3.s[0]", + "ext v4.16b, v2.16b, v2.16b, #0", + "ext v5.16b, v2.16b, v2.16b, #1", + "ext v6.16b, v2.16b, v2.16b, #2", + "ext v2.16b, v2.16b, v2.16b, #3", + "uabdl v4.8h, v4.8b, v3.8b", + "uabdl v5.8h, v5.8b, v3.8b", + "uabdl v6.8h, v6.8b, v3.8b", + "uabdl v2.8h, v2.8b, v3.8b", + "addp v3.8h, v4.8h, v6.8h", + "addp v2.8h, v5.8h, v2.8h", + "trn1 v4.4s, v3.4s, v2.4s", + "trn2 v2.4s, v3.4s, v2.4s", + "addp v2.8h, v4.8h, v2.8h", + "str q2, [x28, #16]" + ] + }, + "vmpsadbw ymm0, ymm1, ymm2, 010b": { + "ExpectedInstructionCount": 31, + "Comment": [ + "Map 3 0b01 0x42 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "dup v4.4s, v18.s[2]", + "ext v5.16b, v17.16b, v17.16b, #0", + "ext v6.16b, v17.16b, v17.16b, #1", + "ext v7.16b, v17.16b, v17.16b, #2", + "ext v8.16b, v17.16b, v17.16b, #3", + "uabdl v5.8h, v5.8b, v4.8b", + "uabdl v6.8h, v6.8b, v4.8b", + "uabdl v7.8h, v7.8b, v4.8b", + "uabdl v4.8h, v8.8b, v4.8b", + "addp v5.8h, v5.8h, v7.8h", + "addp v4.8h, v6.8h, v4.8h", + "trn1 v6.4s, v5.4s, v4.4s", + "trn2 v4.4s, v5.4s, v4.4s", + "addp v16.8h, v6.8h, v4.8h", + "dup v3.4s, v3.s[0]", + "ext v4.16b, v2.16b, v2.16b, #0", + "ext v5.16b, v2.16b, v2.16b, #1", + "ext v6.16b, v2.16b, v2.16b, #2", + "ext v2.16b, v2.16b, v2.16b, #3", + "uabdl v4.8h, v4.8b, v3.8b", + "uabdl v5.8h, v5.8b, v3.8b", + "uabdl v6.8h, v6.8b, v3.8b", + "uabdl v2.8h, v2.8b, v3.8b", + "addp v3.8h, v4.8h, v6.8h", + "addp v2.8h, v5.8h, v2.8h", + "trn1 v4.4s, v3.4s, v2.4s", + "trn2 v2.4s, v3.4s, v2.4s", + "addp v2.8h, v4.8h, v2.8h", + "str q2, [x28, #16]" + ] + }, + "vmpsadbw ymm0, ymm1, ymm2, 011b": { + "ExpectedInstructionCount": 31, + "Comment": [ + "Map 3 0b01 0x42 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "dup v4.4s, v18.s[3]", + "ext v5.16b, v17.16b, v17.16b, #0", + "ext v6.16b, v17.16b, v17.16b, #1", + "ext v7.16b, v17.16b, v17.16b, #2", + "ext v8.16b, v17.16b, v17.16b, #3", + "uabdl v5.8h, v5.8b, v4.8b", + "uabdl v6.8h, v6.8b, v4.8b", + "uabdl v7.8h, v7.8b, v4.8b", + "uabdl v4.8h, v8.8b, v4.8b", + "addp v5.8h, v5.8h, v7.8h", + "addp v4.8h, v6.8h, v4.8h", + "trn1 v6.4s, v5.4s, v4.4s", + "trn2 v4.4s, v5.4s, v4.4s", + "addp v16.8h, v6.8h, v4.8h", + "dup v3.4s, v3.s[0]", + "ext v4.16b, v2.16b, v2.16b, #0", + "ext v5.16b, v2.16b, v2.16b, #1", + "ext v6.16b, v2.16b, v2.16b, #2", + "ext v2.16b, v2.16b, v2.16b, #3", + "uabdl v4.8h, v4.8b, v3.8b", + "uabdl v5.8h, v5.8b, v3.8b", + "uabdl v6.8h, v6.8b, v3.8b", + "uabdl v2.8h, v2.8b, v3.8b", + "addp v3.8h, v4.8h, v6.8h", + "addp v2.8h, v5.8h, v2.8h", + "trn1 v4.4s, v3.4s, v2.4s", + "trn2 v2.4s, v3.4s, v2.4s", + "addp v2.8h, v4.8h, v2.8h", + "str q2, [x28, #16]" + ] + }, + "vmpsadbw ymm0, ymm1, ymm2, 100b": { + "ExpectedInstructionCount": 31, + "Comment": [ + "Map 3 0b01 0x42 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "dup v4.4s, v18.s[0]", + "ext v5.16b, v17.16b, v17.16b, #4", + "ext v6.16b, v17.16b, v17.16b, #5", + "ext v7.16b, v17.16b, v17.16b, #6", + "ext v8.16b, v17.16b, v17.16b, #7", + "uabdl v5.8h, v5.8b, v4.8b", + "uabdl v6.8h, v6.8b, v4.8b", + "uabdl v7.8h, v7.8b, v4.8b", + "uabdl v4.8h, v8.8b, v4.8b", + "addp v5.8h, v5.8h, v7.8h", + "addp v4.8h, v6.8h, v4.8h", + "trn1 v6.4s, v5.4s, v4.4s", + "trn2 v4.4s, v5.4s, v4.4s", + "addp v16.8h, v6.8h, v4.8h", + "dup v3.4s, v3.s[0]", + "ext v4.16b, v2.16b, v2.16b, #0", + "ext v5.16b, v2.16b, v2.16b, #1", + "ext v6.16b, v2.16b, v2.16b, #2", + "ext v2.16b, v2.16b, v2.16b, #3", + "uabdl v4.8h, v4.8b, v3.8b", + "uabdl v5.8h, v5.8b, v3.8b", + "uabdl v6.8h, v6.8b, v3.8b", + "uabdl v2.8h, v2.8b, v3.8b", + "addp v3.8h, v4.8h, v6.8h", + "addp v2.8h, v5.8h, v2.8h", + "trn1 v4.4s, v3.4s, v2.4s", + "trn2 v2.4s, v3.4s, v2.4s", + "addp v2.8h, v4.8h, v2.8h", + "str q2, [x28, #16]" + ] + }, + "vmpsadbw ymm0, ymm1, ymm2, 101b": { + "ExpectedInstructionCount": 31, + "Comment": [ + "Map 3 0b01 0x42 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "dup v4.4s, v18.s[1]", + "ext v5.16b, v17.16b, v17.16b, #4", + "ext v6.16b, v17.16b, v17.16b, #5", + "ext v7.16b, v17.16b, v17.16b, #6", + "ext v8.16b, v17.16b, v17.16b, #7", + "uabdl v5.8h, v5.8b, v4.8b", + "uabdl v6.8h, v6.8b, v4.8b", + "uabdl v7.8h, v7.8b, v4.8b", + "uabdl v4.8h, v8.8b, v4.8b", + "addp v5.8h, v5.8h, v7.8h", + "addp v4.8h, v6.8h, v4.8h", + "trn1 v6.4s, v5.4s, v4.4s", + "trn2 v4.4s, v5.4s, v4.4s", + "addp v16.8h, v6.8h, v4.8h", + "dup v3.4s, v3.s[0]", + "ext v4.16b, v2.16b, v2.16b, #0", + "ext v5.16b, v2.16b, v2.16b, #1", + "ext v6.16b, v2.16b, v2.16b, #2", + "ext v2.16b, v2.16b, v2.16b, #3", + "uabdl v4.8h, v4.8b, v3.8b", + "uabdl v5.8h, v5.8b, v3.8b", + "uabdl v6.8h, v6.8b, v3.8b", + "uabdl v2.8h, v2.8b, v3.8b", + "addp v3.8h, v4.8h, v6.8h", + "addp v2.8h, v5.8h, v2.8h", + "trn1 v4.4s, v3.4s, v2.4s", + "trn2 v2.4s, v3.4s, v2.4s", + "addp v2.8h, v4.8h, v2.8h", + "str q2, [x28, #16]" + ] + }, + "vmpsadbw ymm0, ymm1, ymm2, 110b": { + "ExpectedInstructionCount": 31, + "Comment": [ + "Map 3 0b01 0x42 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "dup v4.4s, v18.s[2]", + "ext v5.16b, v17.16b, v17.16b, #4", + "ext v6.16b, v17.16b, v17.16b, #5", + "ext v7.16b, v17.16b, v17.16b, #6", + "ext v8.16b, v17.16b, v17.16b, #7", + "uabdl v5.8h, v5.8b, v4.8b", + "uabdl v6.8h, v6.8b, v4.8b", + "uabdl v7.8h, v7.8b, v4.8b", + "uabdl v4.8h, v8.8b, v4.8b", + "addp v5.8h, v5.8h, v7.8h", + "addp v4.8h, v6.8h, v4.8h", + "trn1 v6.4s, v5.4s, v4.4s", + "trn2 v4.4s, v5.4s, v4.4s", + "addp v16.8h, v6.8h, v4.8h", + "dup v3.4s, v3.s[0]", + "ext v4.16b, v2.16b, v2.16b, #0", + "ext v5.16b, v2.16b, v2.16b, #1", + "ext v6.16b, v2.16b, v2.16b, #2", + "ext v2.16b, v2.16b, v2.16b, #3", + "uabdl v4.8h, v4.8b, v3.8b", + "uabdl v5.8h, v5.8b, v3.8b", + "uabdl v6.8h, v6.8b, v3.8b", + "uabdl v2.8h, v2.8b, v3.8b", + "addp v3.8h, v4.8h, v6.8h", + "addp v2.8h, v5.8h, v2.8h", + "trn1 v4.4s, v3.4s, v2.4s", + "trn2 v2.4s, v3.4s, v2.4s", + "addp v2.8h, v4.8h, v2.8h", + "str q2, [x28, #16]" + ] + }, + "vmpsadbw ymm0, ymm1, ymm2, 111b": { + "ExpectedInstructionCount": 31, + "Comment": [ + "Map 3 0b01 0x42 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "dup v4.4s, v18.s[3]", + "ext v5.16b, v17.16b, v17.16b, #4", + "ext v6.16b, v17.16b, v17.16b, #5", + "ext v7.16b, v17.16b, v17.16b, #6", + "ext v8.16b, v17.16b, v17.16b, #7", + "uabdl v5.8h, v5.8b, v4.8b", + "uabdl v6.8h, v6.8b, v4.8b", + "uabdl v7.8h, v7.8b, v4.8b", + "uabdl v4.8h, v8.8b, v4.8b", + "addp v5.8h, v5.8h, v7.8h", + "addp v4.8h, v6.8h, v4.8h", + "trn1 v6.4s, v5.4s, v4.4s", + "trn2 v4.4s, v5.4s, v4.4s", + "addp v16.8h, v6.8h, v4.8h", + "dup v3.4s, v3.s[0]", + "ext v4.16b, v2.16b, v2.16b, #0", + "ext v5.16b, v2.16b, v2.16b, #1", + "ext v6.16b, v2.16b, v2.16b, #2", + "ext v2.16b, v2.16b, v2.16b, #3", + "uabdl v4.8h, v4.8b, v3.8b", + "uabdl v5.8h, v5.8b, v3.8b", + "uabdl v6.8h, v6.8b, v3.8b", + "uabdl v2.8h, v2.8b, v3.8b", + "addp v3.8h, v4.8h, v6.8h", + "addp v2.8h, v5.8h, v2.8h", + "trn1 v4.4s, v3.4s, v2.4s", + "trn2 v2.4s, v3.4s, v2.4s", + "addp v2.8h, v4.8h, v2.8h", + "str q2, [x28, #16]" + ] + }, + "vpclmulqdq xmm0, xmm1, xmm2, 00000b": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 3 0b01 0x44 128-bit" + ], + "ExpectedArm64ASM": [ + "pmull v16.1q, v17.1d, v18.1d" + ] + }, + "vpclmulqdq xmm0, xmm1, xmm2, 00001b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x44 128-bit" + ], + "ExpectedArm64ASM": [ + "dup v0.2d, v17.d[1]", + "pmull v16.1q, v0.1d, v18.1d" + ] + }, + "vpclmulqdq xmm0, xmm1, xmm2, 10000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x44 128-bit" + ], + "ExpectedArm64ASM": [ + "dup v0.2d, v18.d[1]", + "pmull v16.1q, v0.1d, v17.1d" + ] + }, + "vpclmulqdq xmm0, xmm1, xmm2, 10001b": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map 3 0b01 0x44 128-bit" + ], + "ExpectedArm64ASM": [ + "pmull2 v16.1q, v17.2d, v18.2d" + ] + }, + "vpclmulqdq ymm0, ymm1, ymm2, 00000b": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 3 0b01 0x44 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "pmull v16.1q, v17.1d, v18.1d", + "pmull v2.1q, v2.1d, v3.1d", + "str q2, [x28, #16]" + ] + }, + "vpclmulqdq ymm0, ymm1, ymm2, 00001b": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 3 0b01 0x44 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "dup v0.2d, v17.d[1]", + "pmull v16.1q, v0.1d, v18.1d", + "dup v0.2d, v2.d[1]", + "pmull v2.1q, v0.1d, v3.1d", + "str q2, [x28, #16]" + ] + }, + "vpclmulqdq ymm0, ymm1, ymm2, 10000b": { + "ExpectedInstructionCount": 7, + "Comment": [ + "Map 3 0b01 0x44 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "dup v0.2d, v18.d[1]", + "pmull v16.1q, v0.1d, v17.1d", + "dup v0.2d, v3.d[1]", + "pmull v2.1q, v0.1d, v2.1d", + "str q2, [x28, #16]" + ] + }, + "vpclmulqdq ymm0, ymm1, ymm2, 10001b": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map 3 0b01 0x44 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "pmull2 v16.1q, v17.2d, v18.2d", + "pmull2 v2.1q, v2.2d, v3.2d", + "str q2, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00000000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "str q17, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00000001b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #32]", + "str q17, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00000010b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v18.16b", + "str q17, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00000011b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #48]", + "str q17, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00010000b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00010001b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #32]", + "str q16, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00010010b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v18.16b", + "str q2, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00010011b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q16, [x28, #48]", + "str q2, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00100000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v17.16b", + "str q18, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00100001b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #32]", + "str q18, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00100010b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "mov v16.16b, v18.16b", + "str q18, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00100011b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #48]", + "str q18, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00110000b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #48]", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00110001b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #32]", + "ldr q2, [x28, #48]", + "str q2, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00110010b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #48]", + "mov v16.16b, v18.16b", + "str q2, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00110011b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #48]", + "str q16, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00001000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "str q17, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00011000b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v16.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00101000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "str q18, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 00111000b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #48]", + "movi v16.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 10001000b": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "str q16, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 10000000b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 10000001b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #32]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 10000010b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v18.16b", + "str q2, [x28, #16]" + ] + }, + "vperm2i128 ymm0, ymm1, ymm2, 10000011b": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x46 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q16, [x28, #48]", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vblendvps xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x4a 128-bit" + ], + "ExpectedArm64ASM": [ + "sshr v2.4s, v19.4s, #31", + "mov v16.16b, v2.16b", + "bsl v16.16b, v18.16b, v17.16b" + ] + }, + "vblendvps ymm0, ymm1, ymm2, ymm3": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x4a 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "ldr q4, [x28, #64]", + "sshr v5.4s, v19.4s, #31", + "mov v16.16b, v5.16b", + "bsl v16.16b, v18.16b, v17.16b", + "sshr v4.4s, v4.4s, #31", + "bit v2.16b, v3.16b, v4.16b", + "str q2, [x28, #16]" + ] + }, + "vblendvpd xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x4b 128-bit" + ], + "ExpectedArm64ASM": [ + "sshr v2.2d, v19.2d, #63", + "mov v16.16b, v2.16b", + "bsl v16.16b, v18.16b, v17.16b" + ] + }, + "vblendvpd ymm0, ymm1, ymm2, ymm3": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x4b 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "ldr q4, [x28, #64]", + "sshr v5.2d, v19.2d, #63", + "mov v16.16b, v5.16b", + "bsl v16.16b, v18.16b, v17.16b", + "sshr v4.2d, v4.2d, #63", + "bit v2.16b, v3.16b, v4.16b", + "str q2, [x28, #16]" + ] + }, + "vpblendvb xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map 3 0b01 0x4c 128-bit" + ], + "ExpectedArm64ASM": [ + "sshr v2.16b, v19.16b, #7", + "mov v16.16b, v2.16b", + "bsl v16.16b, v18.16b, v17.16b" + ] + }, + "vpblendvb ymm0, ymm1, ymm2, ymm3": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0x4c 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ldr q3, [x28, #48]", + "ldr q4, [x28, #64]", + "sshr v5.16b, v19.16b, #7", + "mov v16.16b, v5.16b", + "bsl v16.16b, v18.16b, v17.16b", + "sshr v4.16b, v4.16b, #7", + "bit v2.16b, v3.16b, v4.16b", + "str q2, [x28, #16]" + ] + }, + "vfmaddsubps xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x5c 128-bit" + ] + }, + "vfmaddsubps ymm0, ymm1, ymm2, ymm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x5c 256-bit" + ] + }, + "vfmaddsubpd xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x5d 128-bit" + ] + }, + "vfmaddsubpd ymm0, ymm1, ymm2, ymm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x5d 256-bit" + ] + }, + "vfmsubaddps xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x5e 128-bit" + ] + }, + "vfmsubaddps ymm0, ymm1, ymm2, ymm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x5e 256-bit" + ] + }, + "vfmsubaddpd xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x5f 128-bit" + ] + }, + "vfmsubaddpd ymm0, ymm1, ymm2, ymm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x5f 256-bit" + ] + }, + "vfmaddps xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x68 128-bit" + ] + }, + "vfmaddps ymm0, ymm1, ymm2, ymm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x68 256-bit" + ] + }, + "vfmaddpd xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x69 128-bit" + ] + }, + "vfmaddpd ymm0, ymm1, ymm2, ymm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x69 256-bit" + ] + }, + "vfmaddss xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x6a 128-bit" + ] + }, + "vfmaddsd xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x6b 128-bit" + ] + }, + "vfmsubps xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x6c 128-bit" + ] + }, + "vfmsubps ymm0, ymm1, ymm2, ymm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x6c 256-bit" + ] + }, + "vfmsubpd xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x6d 128-bit" + ] + }, + "vfmsubpd ymm0, ymm1, ymm2, ymm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x6d 256-bit" + ] + }, + "vfmsubss xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x6e 128-bit" + ] + }, + "vfmsubsd xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x6f 128-bit" + ] + }, + "vfnmaddps xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x78 128-bit" + ] + }, + "vfnmaddpd ymm0, ymm1, ymm2, ymm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x78 256-bit" + ] + }, + "vfnmaddss xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x79 128-bit" + ] + }, + "vfnmaddsd xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x7a 128-bit" + ] + }, + "vfnmsubps xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x7c 128-bit" + ] + }, + "vfnmsubps ymm0, ymm1, ymm2, ymm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x7c 256-bit" + ] + }, + "vfnmsubpd xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x7d 128-bit" + ] + }, + "vfnmsubpd ymm0, ymm1, ymm2, ymm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x7d 256-bit" + ] + }, + "vfnmsubss xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x7e 128-bit" + ] + }, + "vfnmsubsd xmm0, xmm1, xmm2, xmm3": { + "ExpectedInstructionCount": -1, + "Skip": "Yes", + "Comment": [ + "Map 3 0b01 0x7f 128-bit" + ] + }, + "vaeskeygenassist xmm0, xmm1, 0": { + "ExpectedInstructionCount": 6, + "Comment": [ + "Map 3 0b01 0xdf 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "ldr q3, [x28, #2528]", + "mov v16.16b, v17.16b", + "unimplemented (Unimplemented)", + "tbl v16.16b, {v16.16b}, v3.16b", + "str q2, [x28, #16]" + ] + }, + "vaeskeygenassist xmm0, xmm1, 0xFF": { + "ExpectedInstructionCount": 9, + "Comment": [ + "Map 3 0b01 0xdf 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "ldr q3, [x28, #2528]", + "mov v16.16b, v17.16b", + "unimplemented (Unimplemented)", + "tbl v16.16b, {v16.16b}, v3.16b", + "mov x0, #0xff00000000", + "dup v1.2d, x0", + "eor v16.16b, v16.16b, v1.16b", + "str q2, [x28, #16]" + ] + } + } +} diff --git a/unittests/InstructionCountCI/AVX128/VEX_map_group.json b/unittests/InstructionCountCI/AVX128/VEX_map_group.json new file mode 100644 index 0000000000..872b4d0d2a --- /dev/null +++ b/unittests/InstructionCountCI/AVX128/VEX_map_group.json @@ -0,0 +1,691 @@ +{ + "Features": { + "Bitness": 64, + "EnabledHostFeatures": [ + "AVX" + ], + "DisabledHostFeatures": [ + "AFP", + "FLAGM", + "FLAGM2", + "SVE256", + "SVE128" + ] + }, + "Instructions": { + "vpsrlw xmm0, xmm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 12 0b010 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpsrlw xmm0, xmm1, 15": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 12 0b010 128-bit" + ], + "ExpectedArm64ASM": [ + "ushr v16.8h, v17.8h, #15", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrlw xmm0, xmm1, 16": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 12 0b010 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrlw ymm0, ymm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 12 0b010 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpsrlw ymm0, ymm1, 15": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map group 12 0b010 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ushr v16.8h, v17.8h, #15", + "ushr v2.8h, v2.8h, #15", + "str q2, [x28, #16]" + ] + }, + "vpsrlw ymm0, ymm1, 16": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map group 12 0b010 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v16.2d, #0x0", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsraw xmm0, xmm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 12 0b100 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpsraw xmm0, xmm1, 15": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 12 0b100 128-bit" + ], + "ExpectedArm64ASM": [ + "sshr v16.8h, v17.8h, #15", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsraw xmm0, xmm1, 16": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 12 0b100 128-bit" + ], + "ExpectedArm64ASM": [ + "sshr v16.8h, v17.8h, #15", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsraw ymm0, ymm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 12 0b100 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpsraw ymm0, ymm1, 15": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map group 12 0b100 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "sshr v16.8h, v17.8h, #15", + "sshr v2.8h, v2.8h, #15", + "str q2, [x28, #16]" + ] + }, + "vpsraw ymm0, ymm1, 16": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map group 12 0b100 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "sshr v16.8h, v17.8h, #15", + "sshr v2.8h, v2.8h, #15", + "str q2, [x28, #16]" + ] + }, + "vpsllw xmm0, xmm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 12 0b110 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpsllw xmm0, xmm1, 15": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 12 0b110 128-bit" + ], + "ExpectedArm64ASM": [ + "shl v16.8h, v17.8h, #15", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsllw xmm0, xmm1, 16": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 12 0b110 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsllw ymm0, ymm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 12 0b110 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpsllw ymm0, ymm1, 15": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map group 12 0b110 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "shl v16.8h, v17.8h, #15", + "shl v2.8h, v2.8h, #15", + "str q2, [x28, #16]" + ] + }, + "vpsllw ymm0, ymm1, 16": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map group 12 0b110 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v16.2d, #0x0", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrld xmm0, xmm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 13 0b010 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpsrld xmm0, xmm1, 31": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 13 0b010 128-bit" + ], + "ExpectedArm64ASM": [ + "ushr v16.4s, v17.4s, #31", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrld xmm0, xmm1, 32": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 13 0b010 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrld ymm0, ymm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 13 0b010 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpsrld ymm0, ymm1, 31": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map group 13 0b010 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ushr v16.4s, v17.4s, #31", + "ushr v2.4s, v2.4s, #31", + "str q2, [x28, #16]" + ] + }, + "vpsrld ymm0, ymm1, 32": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map group 13 0b010 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v16.2d, #0x0", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrad xmm0, xmm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 13 0b100 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpsrad xmm0, xmm1, 31": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 13 0b100 128-bit" + ], + "ExpectedArm64ASM": [ + "sshr v16.4s, v17.4s, #31", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrad xmm0, xmm1, 32": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 13 0b100 128-bit" + ], + "ExpectedArm64ASM": [ + "sshr v16.4s, v17.4s, #31", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrad ymm0, ymm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 13 0b100 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpsrad ymm0, ymm1, 31": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map group 13 0b100 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "sshr v16.4s, v17.4s, #31", + "sshr v2.4s, v2.4s, #31", + "str q2, [x28, #16]" + ] + }, + "vpsrad ymm0, ymm1, 32": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map group 13 0b100 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "sshr v16.4s, v17.4s, #31", + "sshr v2.4s, v2.4s, #31", + "str q2, [x28, #16]" + ] + }, + "vpslld xmm0, xmm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 13 0b110 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpslld xmm0, xmm1, 31": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 13 0b110 128-bit" + ], + "ExpectedArm64ASM": [ + "shl v16.4s, v17.4s, #31", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpslld xmm0, xmm1, 32": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 13 0b110 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpslld ymm0, ymm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 13 0b110 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpslld ymm0, ymm1, 31": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map group 13 0b110 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "shl v16.4s, v17.4s, #31", + "shl v2.4s, v2.4s, #31", + "str q2, [x28, #16]" + ] + }, + "vpslld ymm0, ymm1, 32": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map group 13 0b110 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v16.2d, #0x0", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrlq xmm0, xmm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 14 0b010 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpsrlq xmm0, xmm1, 63": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 14 0b010 128-bit" + ], + "ExpectedArm64ASM": [ + "ushr v16.2d, v17.2d, #63", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrlq xmm0, xmm1, 64": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 14 0b010 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrlq ymm0, ymm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 14 0b010 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpsrlq ymm0, ymm1, 63": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map group 14 0b010 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "ushr v16.2d, v17.2d, #63", + "ushr v2.2d, v2.2d, #63", + "str q2, [x28, #16]" + ] + }, + "vpsrlq ymm0, ymm1, 64": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map group 14 0b010 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v16.2d, #0x0", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsrldq xmm0, xmm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 14 0b011 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpsrldq xmm0, xmm1, 15": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 14 0b011 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "ext v16.16b, v17.16b, v2.16b, #15", + "str q2, [x28, #16]" + ] + }, + "vpsrldq xmm0, xmm1, 16": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map group 14 0b011 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "str q16, [x28, #16]" + ] + }, + "vpsrldq ymm0, ymm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 14 0b011 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpsrldq ymm0, ymm1, 15": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map group 14 0b011 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "ext v16.16b, v17.16b, v3.16b, #15", + "ext v2.16b, v2.16b, v3.16b, #15", + "str q2, [x28, #16]" + ] + }, + "vpsrldq ymm0, ymm1, 16": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map group 14 0b011 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0" + ] + }, + "vpsllq xmm0, xmm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 14 0b110 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpsllq xmm0, xmm1, 63": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 14 0b110 128-bit" + ], + "ExpectedArm64ASM": [ + "shl v16.2d, v17.2d, #63", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsllq xmm0, xmm1, 64": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 14 0b110 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpsllq ymm0, ymm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 14 0b110 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpsllq ymm0, ymm1, 63": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map group 14 0b110 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "shl v16.2d, v17.2d, #63", + "shl v2.2d, v2.2d, #63", + "str q2, [x28, #16]" + ] + }, + "vpsllq ymm0, ymm1, 64": { + "ExpectedInstructionCount": 4, + "Comment": [ + "Map group 14 0b110 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v16.2d, #0x0", + "movi v2.2d, #0x0", + "str q2, [x28, #16]" + ] + }, + "vpslldq xmm0, xmm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 14 0b111 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpslldq xmm0, xmm1, 15": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 14 0b111 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v2.2d, #0x0", + "ext v16.16b, v2.16b, v17.16b, #1", + "str q2, [x28, #16]" + ] + }, + "vpslldq xmm0, xmm1, 16": { + "ExpectedInstructionCount": 2, + "Comment": [ + "Map group 14 0b111 128-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0", + "str q16, [x28, #16]" + ] + }, + "vpslldq ymm0, ymm1, 0": { + "ExpectedInstructionCount": 3, + "Comment": [ + "Map group 14 0b111 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "mov v16.16b, v17.16b", + "str q2, [x28, #16]" + ] + }, + "vpslldq ymm0, ymm1, 15": { + "ExpectedInstructionCount": 5, + "Comment": [ + "Map group 14 0b111 256-bit" + ], + "ExpectedArm64ASM": [ + "ldr q2, [x28, #32]", + "movi v3.2d, #0x0", + "ext v16.16b, v3.16b, v17.16b, #1", + "ext v2.16b, v3.16b, v2.16b, #1", + "str q2, [x28, #16]" + ] + }, + "vpslldq ymm0, ymm1, 16": { + "ExpectedInstructionCount": 1, + "Comment": [ + "Map group 14 0b111 256-bit" + ], + "ExpectedArm64ASM": [ + "movi v16.2d, #0x0" + ] + } + } +}