-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AIE2] Tests for instrinsic lowering using shufflevector
- Loading branch information
1 parent
7c281a9
commit 3e692b4
Showing
1 changed file
with
369 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,369 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; | ||
; This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
; See https://llvm.org/LICENSE.txt for license information. | ||
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
; | ||
; (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates | ||
; RUN: llc -O2 -mtriple=aie2 -verify-machineinstrs --issue-limit=1 %s -o - | FileCheck %s | ||
|
||
define <8 x i32> @test_extract_vector(<16 x i32> noundef %a, i32 noundef %idx) { | ||
; CHECK-LABEL: test_extract_vector: | ||
; CHECK: .p2align 4 | ||
; CHECK-NEXT: // %bb.0: // %entry | ||
; CHECK-NEXT: nopb ; nopa ; nops ; jz r0, #.LBB0_2; nopv | ||
; CHECK-NEXT: nopa ; nopx // Delay Slot 5 | ||
; CHECK-NEXT: nop // Delay Slot 4 | ||
; CHECK-NEXT: nop // Delay Slot 3 | ||
; CHECK-NEXT: nop // Delay Slot 2 | ||
; CHECK-NEXT: mov r8, r16 // Delay Slot 1 | ||
; CHECK-NEXT: // %bb.1: // %if.end | ||
; CHECK-NEXT: mova r16, #8 | ||
; CHECK-NEXT: vextract.s32 r0, x2, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #9 | ||
; CHECK-NEXT: vextract.s32 r1, x2, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #10 | ||
; CHECK-NEXT: vextract.s32 r2, x2, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #11 | ||
; CHECK-NEXT: vextract.s32 r3, x2, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #12 | ||
; CHECK-NEXT: vextract.s32 r4, x2, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #13 | ||
; CHECK-NEXT: vextract.s32 r5, x2, r16 | ||
; CHECK-NEXT: j #.LBB0_3 | ||
; CHECK-NEXT: nop // Delay Slot 5 | ||
; CHECK-NEXT: mova r16, #15 // Delay Slot 4 | ||
; CHECK-NEXT: vextract.s32 r6, x2, r16 // Delay Slot 3 | ||
; CHECK-NEXT: nop // Delay Slot 2 | ||
; CHECK-NEXT: mova r16, #14 // Delay Slot 1 | ||
; CHECK-NEXT: .p2align 4 | ||
; CHECK-NEXT: .LBB0_2: // %if.then | ||
; CHECK-NEXT: mova r16, #0; nopxm | ||
; CHECK-NEXT: vextract.s32 r0, x2, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #1 | ||
; CHECK-NEXT: vextract.s32 r1, x2, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #2 | ||
; CHECK-NEXT: vextract.s32 r2, x2, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #3 | ||
; CHECK-NEXT: vextract.s32 r3, x2, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #4 | ||
; CHECK-NEXT: vextract.s32 r4, x2, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #5 | ||
; CHECK-NEXT: vextract.s32 r5, x2, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #7 | ||
; CHECK-NEXT: vextract.s32 r6, x2, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #6 | ||
; CHECK-NEXT: .p2align 4 | ||
; CHECK-NEXT: .LBB0_3: // %return | ||
; CHECK-NEXT: nopx ; vextract.s32 r7, x2, r16 | ||
; CHECK-NEXT: vpush.lo.32 x0, r6, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r7, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r5, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r4, x0 | ||
; CHECK-NEXT: ret lr | ||
; CHECK-NEXT: vpush.lo.32 x0, r3, x0 // Delay Slot 5 | ||
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 4 | ||
; CHECK-NEXT: vpush.lo.32 x0, r1, x0 // Delay Slot 3 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2 | ||
; CHECK-NEXT: mov r16, r8 // Delay Slot 1 | ||
entry: | ||
%cmp = icmp eq i32 %idx, 0 | ||
br i1 %cmp, label %if.then, label %if.end | ||
|
||
if.then: | ||
%shuffle = shufflevector <16 x i32> %a, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | ||
br label %return | ||
|
||
if.end: | ||
%shuffle1 = shufflevector <16 x i32> %a, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> | ||
br label %return | ||
|
||
return: | ||
%retval.0 = phi <8 x i32> [ %shuffle, %if.then ], [ %shuffle1, %if.end ] | ||
ret <8 x i32> %retval.0 | ||
} | ||
|
||
define <16 x i32> @test_insert_vector(<16 x i32> noundef %a, i32 noundef %idx, <8 x i32> noundef %b) { | ||
; CHECK-LABEL: test_insert_vector: | ||
; CHECK: .p2align 4 | ||
; CHECK-NEXT: // %bb.0: // %entry | ||
; CHECK-NEXT: nopa ; nopb ; nopx ; mov r25, r17 | ||
; CHECK-NEXT: mov r26, r18 | ||
; CHECK-NEXT: mov r27, r19 | ||
; CHECK-NEXT: mova r19, #0 | ||
; CHECK-NEXT: mova r18, #1 | ||
; CHECK-NEXT: mova r17, #2 | ||
; CHECK-NEXT: mov r24, r16 | ||
; CHECK-NEXT: mova r16, #3 | ||
; CHECK-NEXT: vextract.s32 r4, x4, r16 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 | ||
; CHECK-NEXT: mova r16, #4 | ||
; CHECK-NEXT: vextract.s32 r1, x4, r19 | ||
; CHECK-NEXT: vextract.s32 r2, x4, r18 | ||
; CHECK-NEXT: vextract.s32 r3, x4, r17 | ||
; CHECK-NEXT: vextract.s32 r5, x4, r16 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 | ||
; CHECK-NEXT: mova r16, #5 | ||
; CHECK-NEXT: vextract.s32 r6, x4, r16 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 | ||
; CHECK-NEXT: mova r16, #6 | ||
; CHECK-NEXT: vextract.s32 r7, x4, r16 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 | ||
; CHECK-NEXT: mova r16, #7 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 | ||
; CHECK-NEXT: vextract.s32 r8, x4, r16 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r8, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r7, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r6, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r5, x0 | ||
; CHECK-NEXT: jz r0, #.LBB1_2 | ||
; CHECK-NEXT: vpush.lo.32 x0, r4, x0 // Delay Slot 5 | ||
; CHECK-NEXT: vpush.lo.32 x0, r3, x0 // Delay Slot 4 | ||
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 3 | ||
; CHECK-NEXT: vpush.lo.32 x0, r1, x0 // Delay Slot 2 | ||
; CHECK-NEXT: nop // Delay Slot 1 | ||
; CHECK-NEXT: // %bb.1: // %if.end | ||
; CHECK-NEXT: nopx ; vextract.s32 r12, x2, r16 | ||
; CHECK-NEXT: vextract.s32 r13, x0, r16 | ||
; CHECK-NEXT: vextract.s32 r4, x2, r17 | ||
; CHECK-NEXT: vextract.s32 r5, x0, r17 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r17, #3 | ||
; CHECK-NEXT: vextract.s32 r0, x2, r19 | ||
; CHECK-NEXT: vextract.s32 r1, x0, r19 | ||
; CHECK-NEXT: vextract.s32 r2, x2, r18 | ||
; CHECK-NEXT: vextract.s32 r3, x0, r18 | ||
; CHECK-NEXT: vextract.s32 r6, x2, r17 | ||
; CHECK-NEXT: vextract.s32 r7, x0, r17 | ||
; CHECK-NEXT: movx r16, #6 | ||
; CHECK-NEXT: mova r17, #4 | ||
; CHECK-NEXT: vextract.s32 r14, x2, r16 | ||
; CHECK-NEXT: vextract.s32 r15, x0, r16 | ||
; CHECK-NEXT: vextract.s32 r8, x2, r17 | ||
; CHECK-NEXT: vextract.s32 r9, x0, r17 | ||
; CHECK-NEXT: j #.LBB1_3 | ||
; CHECK-NEXT: nop // Delay Slot 5 | ||
; CHECK-NEXT: mova r17, #5 // Delay Slot 4 | ||
; CHECK-NEXT: vextract.s32 r10, x2, r17 // Delay Slot 3 | ||
; CHECK-NEXT: vextract.s32 r11, x0, r17 // Delay Slot 2 | ||
; CHECK-NEXT: nop // Delay Slot 1 | ||
; CHECK-NEXT: .p2align 4 | ||
; CHECK-NEXT: .LBB1_2: // %if.then | ||
; CHECK-NEXT: nopa ; nopb ; nopx ; vextract.s32 r12, x0, r16; nops | ||
; CHECK-NEXT: vextract.s32 r13, x2, r16 | ||
; CHECK-NEXT: vextract.s32 r4, x0, r17 | ||
; CHECK-NEXT: vextract.s32 r5, x2, r17 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r17, #3 | ||
; CHECK-NEXT: vextract.s32 r0, x0, r19 | ||
; CHECK-NEXT: vextract.s32 r1, x2, r19 | ||
; CHECK-NEXT: vextract.s32 r2, x0, r18 | ||
; CHECK-NEXT: vextract.s32 r3, x2, r18 | ||
; CHECK-NEXT: vextract.s32 r6, x0, r17 | ||
; CHECK-NEXT: vextract.s32 r7, x2, r17 | ||
; CHECK-NEXT: movx r16, #6 | ||
; CHECK-NEXT: mova r17, #4 | ||
; CHECK-NEXT: vextract.s32 r14, x0, r16 | ||
; CHECK-NEXT: vextract.s32 r15, x2, r16 | ||
; CHECK-NEXT: vextract.s32 r8, x0, r17 | ||
; CHECK-NEXT: vextract.s32 r9, x2, r17 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r17, #5 | ||
; CHECK-NEXT: vextract.s32 r10, x0, r17 | ||
; CHECK-NEXT: vextract.s32 r11, x2, r17 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: .p2align 4 | ||
; CHECK-NEXT: .LBB1_3: // %cleanup | ||
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; mov r19, r27; nopv | ||
; CHECK-NEXT: mov r18, r26 | ||
; CHECK-NEXT: mov r17, r25 | ||
; CHECK-NEXT: vpush.lo.32 x0, r13, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r15, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r11, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r9, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r7, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r5, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r3, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r1, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r12, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r14, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r10, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r8, x0 | ||
; CHECK-NEXT: ret lr | ||
; CHECK-NEXT: vpush.lo.32 x0, r6, x0 // Delay Slot 5 | ||
; CHECK-NEXT: vpush.lo.32 x0, r4, x0 // Delay Slot 4 | ||
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 3 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2 | ||
; CHECK-NEXT: mov r16, r24 // Delay Slot 1 | ||
entry: | ||
%shuffle = shufflevector <8 x i32> %b, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> | ||
%cmp = icmp eq i32 %idx, 0 | ||
br i1 %cmp, label %if.then, label %if.end | ||
|
||
if.then: | ||
%shuffle1 = shufflevector <16 x i32> %shuffle, <16 x i32> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> | ||
br label %cleanup | ||
|
||
if.end: ; | ||
%shuffle2 = shufflevector <16 x i32> %a, <16 x i32> %shuffle, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> | ||
br label %cleanup | ||
|
||
cleanup: | ||
%retval.0 = phi <16 x i32> [ %shuffle1, %if.then ], [ %shuffle2, %if.end ] | ||
ret <16 x i32> %retval.0 | ||
} | ||
|
||
define <16 x i32> @test_concat_vector(<8 x i32> noundef %a, <8 x i32> noundef %b) { | ||
; CHECK-LABEL: test_concat_vector: | ||
; CHECK: .p2align 4 | ||
; CHECK-NEXT: // %bb.0: // %entry | ||
; CHECK-NEXT: nopa ; nopx ; mov r24, r16 | ||
; CHECK-NEXT: mova r16, #0 | ||
; CHECK-NEXT: vextract.s32 r0, x2, r16 | ||
; CHECK-NEXT: vextract.s32 r1, x4, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #1 | ||
; CHECK-NEXT: vextract.s32 r2, x2, r16 | ||
; CHECK-NEXT: vextract.s32 r3, x4, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #2 | ||
; CHECK-NEXT: vextract.s32 r4, x2, r16 | ||
; CHECK-NEXT: vextract.s32 r5, x4, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #3 | ||
; CHECK-NEXT: vextract.s32 r6, x2, r16 | ||
; CHECK-NEXT: vextract.s32 r7, x4, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #4 | ||
; CHECK-NEXT: vextract.s32 r8, x2, r16 | ||
; CHECK-NEXT: vextract.s32 r9, x4, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #5 | ||
; CHECK-NEXT: vextract.s32 r10, x2, r16 | ||
; CHECK-NEXT: vextract.s32 r11, x4, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #7 | ||
; CHECK-NEXT: vextract.s32 r12, x2, r16 | ||
; CHECK-NEXT: vextract.s32 r13, x4, r16 | ||
; CHECK-NEXT: nop | ||
; CHECK-NEXT: mova r16, #6 | ||
; CHECK-NEXT: vextract.s32 r14, x2, r16 | ||
; CHECK-NEXT: vextract.s32 r15, x4, r16 | ||
; CHECK-NEXT: vpush.lo.32 x0, r13, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r15, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r11, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r9, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r7, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r5, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r3, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r1, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r12, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r14, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r10, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r8, x0 | ||
; CHECK-NEXT: ret lr | ||
; CHECK-NEXT: vpush.lo.32 x0, r6, x0 // Delay Slot 5 | ||
; CHECK-NEXT: vpush.lo.32 x0, r4, x0 // Delay Slot 4 | ||
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 3 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2 | ||
; CHECK-NEXT: mov r16, r24 // Delay Slot 1 | ||
entry: | ||
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> | ||
ret <16 x i32> %shuffle | ||
} | ||
|
||
define <16 x i32> @test_set_vector(i32 noundef %idx, <8 x i32> noundef %a) { | ||
; CHECK-LABEL: test_set_vector: | ||
; CHECK: .p2align 4 | ||
; CHECK-NEXT: // %bb.0: // %entry | ||
; CHECK-NEXT: nopa ; nopb ; nopx ; mov r9, r16 | ||
; CHECK-NEXT: mova r16, #0 | ||
; CHECK-NEXT: vextract.s32 r1, x2, r16 | ||
; CHECK-NEXT: eqz r0, r0 | ||
; CHECK-NEXT: mova r16, #1 | ||
; CHECK-NEXT: vextract.s32 r2, x2, r16 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 | ||
; CHECK-NEXT: mova r16, #2 | ||
; CHECK-NEXT: vextract.s32 r3, x2, r16 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 | ||
; CHECK-NEXT: mova r16, #3 | ||
; CHECK-NEXT: vextract.s32 r4, x2, r16 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 | ||
; CHECK-NEXT: mova r16, #4 | ||
; CHECK-NEXT: vextract.s32 r5, x2, r16 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 | ||
; CHECK-NEXT: mova r16, #5 | ||
; CHECK-NEXT: vextract.s32 r6, x2, r16 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 | ||
; CHECK-NEXT: mova r16, #6 | ||
; CHECK-NEXT: vextract.s32 r7, x2, r16 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 | ||
; CHECK-NEXT: mova r16, #7 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 | ||
; CHECK-NEXT: vextract.s32 r8, x2, r16 | ||
; CHECK-NEXT: add r16, r0, #-1 | ||
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r8, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r7, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r6, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r5, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r4, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r3, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 | ||
; CHECK-NEXT: vpush.lo.32 x0, r1, x0 | ||
; CHECK-NEXT: vpush.lo.32 x2, r8, x0 | ||
; CHECK-NEXT: vpush.lo.32 x2, r7, x2 | ||
; CHECK-NEXT: vpush.lo.32 x2, r6, x2 | ||
; CHECK-NEXT: vpush.lo.32 x2, r5, x2 | ||
; CHECK-NEXT: vpush.lo.32 x2, r4, x2 | ||
; CHECK-NEXT: vpush.lo.32 x2, r3, x2 | ||
; CHECK-NEXT: vpush.lo.32 x2, r2, x2 | ||
; CHECK-NEXT: vpush.lo.32 x2, r1, x2 | ||
; CHECK-NEXT: vpush.lo.32 x2, r0, x2 | ||
; CHECK-NEXT: vpush.lo.32 x2, r0, x2 | ||
; CHECK-NEXT: vpush.lo.32 x2, r0, x2 | ||
; CHECK-NEXT: vpush.lo.32 x2, r0, x2 | ||
; CHECK-NEXT: vpush.lo.32 x2, r0, x2 | ||
; CHECK-NEXT: ret lr | ||
; CHECK-NEXT: vpush.lo.32 x2, r0, x2 // Delay Slot 5 | ||
; CHECK-NEXT: vpush.lo.32 x2, r0, x2 // Delay Slot 4 | ||
; CHECK-NEXT: vpush.lo.32 x2, r0, x2 // Delay Slot 3 | ||
; CHECK-NEXT: vsel.32 x0, x0, x2, r16 // Delay Slot 2 | ||
; CHECK-NEXT: mov r16, r9 // Delay Slot 1 | ||
entry: | ||
%cmp = icmp eq i32 %idx, 0 | ||
%shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> | ||
%shuffle1 = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> | ||
%retval.0 = select i1 %cmp, <16 x i32> %shuffle, <16 x i32> %shuffle1 | ||
ret <16 x i32> %retval.0 | ||
} | ||
|
||
define i32 @test_extract_elem(<8 x i32> noundef %a, i32 noundef %idx) { | ||
; CHECK-LABEL: test_extract_elem: | ||
; CHECK: .p2align 4 | ||
; CHECK-NEXT: // %bb.0: // %entry | ||
; CHECK-NEXT: nopa ; nopb ; ret lr ; nopm ; nops | ||
; CHECK-NEXT: mov r2, r16 // Delay Slot 5 | ||
; CHECK-NEXT: mov r16, r1 // Delay Slot 4 | ||
; CHECK-NEXT: vextract.s32 r0, x0, r16 // Delay Slot 3 | ||
; CHECK-NEXT: nop // Delay Slot 2 | ||
; CHECK-NEXT: mov r16, r2 // Delay Slot 1 | ||
entry: | ||
%vecext = extractelement <8 x i32> %a, i32 %idx | ||
ret i32 %vecext | ||
} |