Skip to content

Commit

Permalink
[AIE2] Tests for instrinsic lowering using shufflevector
Browse files Browse the repository at this point in the history
  • Loading branch information
ValentijnvdBeek committed Jun 11, 2024
1 parent 7c281a9 commit 3e692b4
Showing 1 changed file with 369 additions and 0 deletions.
369 changes: 369 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/intrinsics-shufflevec.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,369 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
;
; This file is licensed under the Apache License v2.0 with LLVM Exceptions.
; See https://llvm.org/LICENSE.txt for license information.
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
;
; (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
; RUN: llc -O2 -mtriple=aie2 -verify-machineinstrs --issue-limit=1 %s -o - | FileCheck %s

define <8 x i32> @test_extract_vector(<16 x i32> noundef %a, i32 noundef %idx) {
; CHECK-LABEL: test_extract_vector:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopb ; nopa ; nops ; jz r0, #.LBB0_2; nopv
; CHECK-NEXT: nopa ; nopx // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: mov r8, r16 // Delay Slot 1
; CHECK-NEXT: // %bb.1: // %if.end
; CHECK-NEXT: mova r16, #8
; CHECK-NEXT: vextract.s32 r0, x2, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #9
; CHECK-NEXT: vextract.s32 r1, x2, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #10
; CHECK-NEXT: vextract.s32 r2, x2, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #11
; CHECK-NEXT: vextract.s32 r3, x2, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #12
; CHECK-NEXT: vextract.s32 r4, x2, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #13
; CHECK-NEXT: vextract.s32 r5, x2, r16
; CHECK-NEXT: j #.LBB0_3
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: mova r16, #15 // Delay Slot 4
; CHECK-NEXT: vextract.s32 r6, x2, r16 // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: mova r16, #14 // Delay Slot 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_2: // %if.then
; CHECK-NEXT: mova r16, #0; nopxm
; CHECK-NEXT: vextract.s32 r0, x2, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #1
; CHECK-NEXT: vextract.s32 r1, x2, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #2
; CHECK-NEXT: vextract.s32 r2, x2, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #3
; CHECK-NEXT: vextract.s32 r3, x2, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #4
; CHECK-NEXT: vextract.s32 r4, x2, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #5
; CHECK-NEXT: vextract.s32 r5, x2, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #7
; CHECK-NEXT: vextract.s32 r6, x2, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #6
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_3: // %return
; CHECK-NEXT: nopx ; vextract.s32 r7, x2, r16
; CHECK-NEXT: vpush.lo.32 x0, r6, x0
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
; CHECK-NEXT: vpush.lo.32 x0, r4, x0
; CHECK-NEXT: ret lr
; CHECK-NEXT: vpush.lo.32 x0, r3, x0 // Delay Slot 5
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 4
; CHECK-NEXT: vpush.lo.32 x0, r1, x0 // Delay Slot 3
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2
; CHECK-NEXT: mov r16, r8 // Delay Slot 1
entry:
%cmp = icmp eq i32 %idx, 0
br i1 %cmp, label %if.then, label %if.end

if.then:
%shuffle = shufflevector <16 x i32> %a, <16 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
br label %return

if.end:
%shuffle1 = shufflevector <16 x i32> %a, <16 x i32> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
br label %return

return:
%retval.0 = phi <8 x i32> [ %shuffle, %if.then ], [ %shuffle1, %if.end ]
ret <8 x i32> %retval.0
}

define <16 x i32> @test_insert_vector(<16 x i32> noundef %a, i32 noundef %idx, <8 x i32> noundef %b) {
; CHECK-LABEL: test_insert_vector:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopa ; nopb ; nopx ; mov r25, r17
; CHECK-NEXT: mov r26, r18
; CHECK-NEXT: mov r27, r19
; CHECK-NEXT: mova r19, #0
; CHECK-NEXT: mova r18, #1
; CHECK-NEXT: mova r17, #2
; CHECK-NEXT: mov r24, r16
; CHECK-NEXT: mova r16, #3
; CHECK-NEXT: vextract.s32 r4, x4, r16
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
; CHECK-NEXT: mova r16, #4
; CHECK-NEXT: vextract.s32 r1, x4, r19
; CHECK-NEXT: vextract.s32 r2, x4, r18
; CHECK-NEXT: vextract.s32 r3, x4, r17
; CHECK-NEXT: vextract.s32 r5, x4, r16
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
; CHECK-NEXT: mova r16, #5
; CHECK-NEXT: vextract.s32 r6, x4, r16
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
; CHECK-NEXT: mova r16, #6
; CHECK-NEXT: vextract.s32 r7, x4, r16
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
; CHECK-NEXT: mova r16, #7
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
; CHECK-NEXT: vextract.s32 r8, x4, r16
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
; CHECK-NEXT: vpush.lo.32 x0, r8, x0
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
; CHECK-NEXT: vpush.lo.32 x0, r6, x0
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
; CHECK-NEXT: jz r0, #.LBB1_2
; CHECK-NEXT: vpush.lo.32 x0, r4, x0 // Delay Slot 5
; CHECK-NEXT: vpush.lo.32 x0, r3, x0 // Delay Slot 4
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 3
; CHECK-NEXT: vpush.lo.32 x0, r1, x0 // Delay Slot 2
; CHECK-NEXT: nop // Delay Slot 1
; CHECK-NEXT: // %bb.1: // %if.end
; CHECK-NEXT: nopx ; vextract.s32 r12, x2, r16
; CHECK-NEXT: vextract.s32 r13, x0, r16
; CHECK-NEXT: vextract.s32 r4, x2, r17
; CHECK-NEXT: vextract.s32 r5, x0, r17
; CHECK-NEXT: nop
; CHECK-NEXT: mova r17, #3
; CHECK-NEXT: vextract.s32 r0, x2, r19
; CHECK-NEXT: vextract.s32 r1, x0, r19
; CHECK-NEXT: vextract.s32 r2, x2, r18
; CHECK-NEXT: vextract.s32 r3, x0, r18
; CHECK-NEXT: vextract.s32 r6, x2, r17
; CHECK-NEXT: vextract.s32 r7, x0, r17
; CHECK-NEXT: movx r16, #6
; CHECK-NEXT: mova r17, #4
; CHECK-NEXT: vextract.s32 r14, x2, r16
; CHECK-NEXT: vextract.s32 r15, x0, r16
; CHECK-NEXT: vextract.s32 r8, x2, r17
; CHECK-NEXT: vextract.s32 r9, x0, r17
; CHECK-NEXT: j #.LBB1_3
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: mova r17, #5 // Delay Slot 4
; CHECK-NEXT: vextract.s32 r10, x2, r17 // Delay Slot 3
; CHECK-NEXT: vextract.s32 r11, x0, r17 // Delay Slot 2
; CHECK-NEXT: nop // Delay Slot 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_2: // %if.then
; CHECK-NEXT: nopa ; nopb ; nopx ; vextract.s32 r12, x0, r16; nops
; CHECK-NEXT: vextract.s32 r13, x2, r16
; CHECK-NEXT: vextract.s32 r4, x0, r17
; CHECK-NEXT: vextract.s32 r5, x2, r17
; CHECK-NEXT: nop
; CHECK-NEXT: mova r17, #3
; CHECK-NEXT: vextract.s32 r0, x0, r19
; CHECK-NEXT: vextract.s32 r1, x2, r19
; CHECK-NEXT: vextract.s32 r2, x0, r18
; CHECK-NEXT: vextract.s32 r3, x2, r18
; CHECK-NEXT: vextract.s32 r6, x0, r17
; CHECK-NEXT: vextract.s32 r7, x2, r17
; CHECK-NEXT: movx r16, #6
; CHECK-NEXT: mova r17, #4
; CHECK-NEXT: vextract.s32 r14, x0, r16
; CHECK-NEXT: vextract.s32 r15, x2, r16
; CHECK-NEXT: vextract.s32 r8, x0, r17
; CHECK-NEXT: vextract.s32 r9, x2, r17
; CHECK-NEXT: nop
; CHECK-NEXT: mova r17, #5
; CHECK-NEXT: vextract.s32 r10, x0, r17
; CHECK-NEXT: vextract.s32 r11, x2, r17
; CHECK-NEXT: nop
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_3: // %cleanup
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; mov r19, r27; nopv
; CHECK-NEXT: mov r18, r26
; CHECK-NEXT: mov r17, r25
; CHECK-NEXT: vpush.lo.32 x0, r13, x0
; CHECK-NEXT: vpush.lo.32 x0, r15, x0
; CHECK-NEXT: vpush.lo.32 x0, r11, x0
; CHECK-NEXT: vpush.lo.32 x0, r9, x0
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
; CHECK-NEXT: vpush.lo.32 x0, r3, x0
; CHECK-NEXT: vpush.lo.32 x0, r1, x0
; CHECK-NEXT: vpush.lo.32 x0, r12, x0
; CHECK-NEXT: vpush.lo.32 x0, r14, x0
; CHECK-NEXT: vpush.lo.32 x0, r10, x0
; CHECK-NEXT: vpush.lo.32 x0, r8, x0
; CHECK-NEXT: ret lr
; CHECK-NEXT: vpush.lo.32 x0, r6, x0 // Delay Slot 5
; CHECK-NEXT: vpush.lo.32 x0, r4, x0 // Delay Slot 4
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 3
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2
; CHECK-NEXT: mov r16, r24 // Delay Slot 1
entry:
%shuffle = shufflevector <8 x i32> %b, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%cmp = icmp eq i32 %idx, 0
br i1 %cmp, label %if.then, label %if.end

if.then:
%shuffle1 = shufflevector <16 x i32> %shuffle, <16 x i32> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
br label %cleanup

if.end: ;
%shuffle2 = shufflevector <16 x i32> %a, <16 x i32> %shuffle, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
br label %cleanup

cleanup:
%retval.0 = phi <16 x i32> [ %shuffle1, %if.then ], [ %shuffle2, %if.end ]
ret <16 x i32> %retval.0
}

define <16 x i32> @test_concat_vector(<8 x i32> noundef %a, <8 x i32> noundef %b) {
; CHECK-LABEL: test_concat_vector:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopa ; nopx ; mov r24, r16
; CHECK-NEXT: mova r16, #0
; CHECK-NEXT: vextract.s32 r0, x2, r16
; CHECK-NEXT: vextract.s32 r1, x4, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #1
; CHECK-NEXT: vextract.s32 r2, x2, r16
; CHECK-NEXT: vextract.s32 r3, x4, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #2
; CHECK-NEXT: vextract.s32 r4, x2, r16
; CHECK-NEXT: vextract.s32 r5, x4, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #3
; CHECK-NEXT: vextract.s32 r6, x2, r16
; CHECK-NEXT: vextract.s32 r7, x4, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #4
; CHECK-NEXT: vextract.s32 r8, x2, r16
; CHECK-NEXT: vextract.s32 r9, x4, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #5
; CHECK-NEXT: vextract.s32 r10, x2, r16
; CHECK-NEXT: vextract.s32 r11, x4, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #7
; CHECK-NEXT: vextract.s32 r12, x2, r16
; CHECK-NEXT: vextract.s32 r13, x4, r16
; CHECK-NEXT: nop
; CHECK-NEXT: mova r16, #6
; CHECK-NEXT: vextract.s32 r14, x2, r16
; CHECK-NEXT: vextract.s32 r15, x4, r16
; CHECK-NEXT: vpush.lo.32 x0, r13, x0
; CHECK-NEXT: vpush.lo.32 x0, r15, x0
; CHECK-NEXT: vpush.lo.32 x0, r11, x0
; CHECK-NEXT: vpush.lo.32 x0, r9, x0
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
; CHECK-NEXT: vpush.lo.32 x0, r3, x0
; CHECK-NEXT: vpush.lo.32 x0, r1, x0
; CHECK-NEXT: vpush.lo.32 x0, r12, x0
; CHECK-NEXT: vpush.lo.32 x0, r14, x0
; CHECK-NEXT: vpush.lo.32 x0, r10, x0
; CHECK-NEXT: vpush.lo.32 x0, r8, x0
; CHECK-NEXT: ret lr
; CHECK-NEXT: vpush.lo.32 x0, r6, x0 // Delay Slot 5
; CHECK-NEXT: vpush.lo.32 x0, r4, x0 // Delay Slot 4
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 3
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2
; CHECK-NEXT: mov r16, r24 // Delay Slot 1
entry:
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i32> %shuffle
}

define <16 x i32> @test_set_vector(i32 noundef %idx, <8 x i32> noundef %a) {
; CHECK-LABEL: test_set_vector:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopa ; nopb ; nopx ; mov r9, r16
; CHECK-NEXT: mova r16, #0
; CHECK-NEXT: vextract.s32 r1, x2, r16
; CHECK-NEXT: eqz r0, r0
; CHECK-NEXT: mova r16, #1
; CHECK-NEXT: vextract.s32 r2, x2, r16
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
; CHECK-NEXT: mova r16, #2
; CHECK-NEXT: vextract.s32 r3, x2, r16
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
; CHECK-NEXT: mova r16, #3
; CHECK-NEXT: vextract.s32 r4, x2, r16
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
; CHECK-NEXT: mova r16, #4
; CHECK-NEXT: vextract.s32 r5, x2, r16
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
; CHECK-NEXT: mova r16, #5
; CHECK-NEXT: vextract.s32 r6, x2, r16
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
; CHECK-NEXT: mova r16, #6
; CHECK-NEXT: vextract.s32 r7, x2, r16
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
; CHECK-NEXT: mova r16, #7
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
; CHECK-NEXT: vextract.s32 r8, x2, r16
; CHECK-NEXT: add r16, r0, #-1
; CHECK-NEXT: vpush.lo.32 x0, r0, x0
; CHECK-NEXT: vpush.lo.32 x0, r8, x0
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
; CHECK-NEXT: vpush.lo.32 x0, r6, x0
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
; CHECK-NEXT: vpush.lo.32 x0, r4, x0
; CHECK-NEXT: vpush.lo.32 x0, r3, x0
; CHECK-NEXT: vpush.lo.32 x0, r2, x0
; CHECK-NEXT: vpush.lo.32 x0, r1, x0
; CHECK-NEXT: vpush.lo.32 x2, r8, x0
; CHECK-NEXT: vpush.lo.32 x2, r7, x2
; CHECK-NEXT: vpush.lo.32 x2, r6, x2
; CHECK-NEXT: vpush.lo.32 x2, r5, x2
; CHECK-NEXT: vpush.lo.32 x2, r4, x2
; CHECK-NEXT: vpush.lo.32 x2, r3, x2
; CHECK-NEXT: vpush.lo.32 x2, r2, x2
; CHECK-NEXT: vpush.lo.32 x2, r1, x2
; CHECK-NEXT: vpush.lo.32 x2, r0, x2
; CHECK-NEXT: vpush.lo.32 x2, r0, x2
; CHECK-NEXT: vpush.lo.32 x2, r0, x2
; CHECK-NEXT: vpush.lo.32 x2, r0, x2
; CHECK-NEXT: vpush.lo.32 x2, r0, x2
; CHECK-NEXT: ret lr
; CHECK-NEXT: vpush.lo.32 x2, r0, x2 // Delay Slot 5
; CHECK-NEXT: vpush.lo.32 x2, r0, x2 // Delay Slot 4
; CHECK-NEXT: vpush.lo.32 x2, r0, x2 // Delay Slot 3
; CHECK-NEXT: vsel.32 x0, x0, x2, r16 // Delay Slot 2
; CHECK-NEXT: mov r16, r9 // Delay Slot 1
entry:
%cmp = icmp eq i32 %idx, 0
%shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%shuffle1 = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%retval.0 = select i1 %cmp, <16 x i32> %shuffle, <16 x i32> %shuffle1
ret <16 x i32> %retval.0
}

define i32 @test_extract_elem(<8 x i32> noundef %a, i32 noundef %idx) {
; CHECK-LABEL: test_extract_elem:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopa ; nopb ; ret lr ; nopm ; nops
; CHECK-NEXT: mov r2, r16 // Delay Slot 5
; CHECK-NEXT: mov r16, r1 // Delay Slot 4
; CHECK-NEXT: vextract.s32 r0, x0, r16 // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: mov r16, r2 // Delay Slot 1
entry:
%vecext = extractelement <8 x i32> %a, i32 %idx
ret i32 %vecext
}

0 comments on commit 3e692b4

Please sign in to comment.