Skip to content

Commit

Permalink
[AIE2] Add two patterns that extract the first two chunks of a vector
Browse files Browse the repository at this point in the history
  • Loading branch information
ValentijnvdBeek committed May 17, 2024
1 parent ea9cfc4 commit 74dca3d
Show file tree
Hide file tree
Showing 5 changed files with 235 additions and 132 deletions.
43 changes: 38 additions & 5 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,11 +323,15 @@ Register CombinerHelper::createUnmergeValue(MachineInstr &MI,
Builder.setInsertPt(*MI.getParent(), MI);
const LLT DstTy = MRI.getType(DstReg);
const LLT SrcTy = MRI.getType(SrcReg);
assert((SrcTy.getNumElements() % DstTy.getNumElements()) == 0 &&
assert((!DstTy.isVector() ||
(SrcTy.getNumElements() % DstTy.getNumElements()) == 0) &&
"destination vector must divide source cleanly");

const unsigned HalfElements = SrcTy.getNumElements() / 2;
const LLT HalfSizeTy = LLT::fixed_vector(HalfElements, SrcTy.getScalarType());
const LLT ScalarTy = SrcTy.getScalarType();
const LLT HalfSizeTy = (HalfElements == 1)
? ScalarTy
: LLT::fixed_vector(HalfElements, ScalarTy);
const Register TmpReg = MRI.createGenericVirtualRegister(HalfSizeTy);
Register TargetReg = DstReg;
if (DstTy != HalfSizeTy) {
Expand All @@ -337,15 +341,16 @@ Register CombinerHelper::createUnmergeValue(MachineInstr &MI,
// Each destination fits n times into the source and each iteration we exactly
// half the source. Therefore we need to pick on which side we want to iterate
// on.
const uint32_t Position = DestinationIndex * DstTy.getNumElements();
const uint32_t DstNumElements = DstTy.isVector() ? DstTy.getNumElements() : 1;
const uint32_t Position = DestinationIndex * DstNumElements;
if (Position < (SrcTy.getNumElements() / 2))
Builder.buildInstr(TargetOpcode::G_UNMERGE_VALUES, {TargetReg, TmpReg},
{SrcReg});
else
Builder.buildInstr(TargetOpcode::G_UNMERGE_VALUES, {TmpReg, TargetReg},
{SrcReg});

if (DstTy != HalfSizeTy) {
if (HalfSizeTy.isVector() && DstTy != HalfSizeTy) {
return createUnmergeValue(MI, TargetReg, DstReg, DestinationIndex);
}

Expand Down Expand Up @@ -381,6 +386,29 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
return true;
}

// {1, 2, ..., |DstVector|} -> G_UNMERGE_VALUES
// Extracts the first chunk of the same size of the destination vector from the source
std::function<std::optional<int32_t>()> FirstQuarter =
adderGenerator(0, DstNumElts - 1, 1);
if (matchCombineShuffleVectorSimple(MI, FirstQuarter, DstNumElts - 1)) {
if (SrcTy == DstTy || ((SrcNumElts / 2) % 2) != 0)
return false;
createUnmergeValue(MI, MI.getOperand(1).getReg(), DstReg, 0);
MI.eraseFromParent();
return true;
}

// {|DstVector|, |DstVector|+1, ..., 2 * |DstVector|} -> G_UNMERGE_VALUES
// Extracts the second chunk of the same size of the destination vector from the source
std::function<std::optional<int32_t>()> SecondQuarter =
adderGenerator(DstNumElts, (DstNumElts * 2) - 1, 1);
if (matchCombineShuffleVectorSimple(MI, SecondQuarter, DstNumElts - 1)) {
if (((SrcNumElts / 2) % 2) != 0)
return false;
createUnmergeValue(MI, MI.getOperand(1).getReg(), DstReg, 1);
MI.eraseFromParent();
return true;
}
return false;
}

Expand Down Expand Up @@ -416,6 +444,11 @@ bool CombinerHelper::matchCombineShuffleVectorSimple(
if ((DstNumElts < TargetDstSize) && DstNumElts != 1)
return false;

// Check that the shuffle mask can be broken evenly between the
// different sources.
if ((SrcNumElts % DstNumElts) != 0)
return false;

ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
for (unsigned i = 0; i != DstNumElts; ++i) {
int Idx = Mask[i];
Expand All @@ -427,7 +460,7 @@ bool CombinerHelper::matchCombineShuffleVectorSimple(

// Ensure the indices in each SrcType sized piece are seqential and that
// the same source is used for the whole piece.
if ((Idx % SrcNumElts != (ShiftIndex % SrcNumElts)))
if (Idx != ShiftIndex)
return false;
}

Expand Down
7 changes: 2 additions & 5 deletions llvm/test/CodeGen/AArch64/ext-narrow-index.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ define <8 x i8> @i8_off8(<16 x i8> %arg1, <16 x i8> %arg2) {
;
; CHECK-GISEL-LABEL: i8_off8:
; CHECK-GISEL: // %bb.0: // %entry
; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8
; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GISEL-NEXT: mov d0, v0.d[1]
; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Expand Down Expand Up @@ -254,9 +253,7 @@ define <8 x i8> @i8_zero_off8(<16 x i8> %arg1) {
;
; CHECK-GISEL-LABEL: i8_zero_off8:
; CHECK-GISEL: // %bb.0: // %entry
; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8
; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GISEL-NEXT: mov d0, v0.d[1]
; CHECK-GISEL-NEXT: ret
entry:
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
Expand Down
73 changes: 10 additions & 63 deletions llvm/test/CodeGen/AArch64/vecreduce-add.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6305,69 +6305,16 @@ entry:
}

define i32 @add_pair_v8i16_v4i32_double_sext_zext_shuffle(<8 x i16> %ax, <8 x i16> %ay, <8 x i16> %bx, <8 x i16> %by) {
; CHECK-SD-BASE-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
; CHECK-SD-BASE: // %bb.0: // %entry
; CHECK-SD-BASE-NEXT: uaddlp v1.4s, v1.8h
; CHECK-SD-BASE-NEXT: uaddlp v3.4s, v3.8h
; CHECK-SD-BASE-NEXT: uadalp v1.4s, v0.8h
; CHECK-SD-BASE-NEXT: uadalp v3.4s, v2.8h
; CHECK-SD-BASE-NEXT: add v0.4s, v3.4s, v1.4s
; CHECK-SD-BASE-NEXT: addv s0, v0.4s
; CHECK-SD-BASE-NEXT: fmov w0, s0
; CHECK-SD-BASE-NEXT: ret
;
; CHECK-SD-DOT-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
; CHECK-SD-DOT: // %bb.0: // %entry
; CHECK-SD-DOT-NEXT: uaddlp v1.4s, v1.8h
; CHECK-SD-DOT-NEXT: uaddlp v3.4s, v3.8h
; CHECK-SD-DOT-NEXT: uadalp v1.4s, v0.8h
; CHECK-SD-DOT-NEXT: uadalp v3.4s, v2.8h
; CHECK-SD-DOT-NEXT: add v0.4s, v3.4s, v1.4s
; CHECK-SD-DOT-NEXT: addv s0, v0.4s
; CHECK-SD-DOT-NEXT: fmov w0, s0
; CHECK-SD-DOT-NEXT: ret
;
; CHECK-GI-BASE-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
; CHECK-GI-BASE: // %bb.0: // %entry
; CHECK-GI-BASE-NEXT: ushll v4.4s, v0.4h, #0
; CHECK-GI-BASE-NEXT: ushll2 v0.4s, v0.8h, #0
; CHECK-GI-BASE-NEXT: ushll v5.4s, v1.4h, #0
; CHECK-GI-BASE-NEXT: ushll2 v1.4s, v1.8h, #0
; CHECK-GI-BASE-NEXT: ushll v6.4s, v2.4h, #0
; CHECK-GI-BASE-NEXT: ushll2 v2.4s, v2.8h, #0
; CHECK-GI-BASE-NEXT: ushll v7.4s, v3.4h, #0
; CHECK-GI-BASE-NEXT: ushll2 v3.4s, v3.8h, #0
; CHECK-GI-BASE-NEXT: add v0.4s, v4.4s, v0.4s
; CHECK-GI-BASE-NEXT: add v1.4s, v5.4s, v1.4s
; CHECK-GI-BASE-NEXT: add v2.4s, v6.4s, v2.4s
; CHECK-GI-BASE-NEXT: add v3.4s, v7.4s, v3.4s
; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-GI-BASE-NEXT: add v1.4s, v2.4s, v3.4s
; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-GI-BASE-NEXT: addv s0, v0.4s
; CHECK-GI-BASE-NEXT: fmov w0, s0
; CHECK-GI-BASE-NEXT: ret
;
; CHECK-GI-DOT-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
; CHECK-GI-DOT: // %bb.0: // %entry
; CHECK-GI-DOT-NEXT: ushll v4.4s, v0.4h, #0
; CHECK-GI-DOT-NEXT: ushll2 v0.4s, v0.8h, #0
; CHECK-GI-DOT-NEXT: ushll v5.4s, v1.4h, #0
; CHECK-GI-DOT-NEXT: ushll2 v1.4s, v1.8h, #0
; CHECK-GI-DOT-NEXT: ushll v6.4s, v2.4h, #0
; CHECK-GI-DOT-NEXT: ushll2 v2.4s, v2.8h, #0
; CHECK-GI-DOT-NEXT: ushll v7.4s, v3.4h, #0
; CHECK-GI-DOT-NEXT: ushll2 v3.4s, v3.8h, #0
; CHECK-GI-DOT-NEXT: add v0.4s, v4.4s, v0.4s
; CHECK-GI-DOT-NEXT: add v1.4s, v5.4s, v1.4s
; CHECK-GI-DOT-NEXT: add v2.4s, v6.4s, v2.4s
; CHECK-GI-DOT-NEXT: add v3.4s, v7.4s, v3.4s
; CHECK-GI-DOT-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-GI-DOT-NEXT: add v1.4s, v2.4s, v3.4s
; CHECK-GI-DOT-NEXT: add v0.4s, v0.4s, v1.4s
; CHECK-GI-DOT-NEXT: addv s0, v0.4s
; CHECK-GI-DOT-NEXT: fmov w0, s0
; CHECK-GI-DOT-NEXT: ret
; CHECK-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: uaddlp v1.4s, v1.8h
; CHECK-NEXT: uaddlp v3.4s, v3.8h
; CHECK-NEXT: uadalp v1.4s, v0.8h
; CHECK-NEXT: uadalp v3.4s, v2.8h
; CHECK-NEXT: add v0.4s, v3.4s, v1.4s
; CHECK-NEXT: addv s0, v0.4s
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
%axx = zext <8 x i16> %ax to <8 x i32>
%s1h = shufflevector <8 x i32> %axx, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,177 @@ body: |
%0:_(<64 x s8>) = G_SHUFFLE_VECTOR %1:_(<32 x s8>), %2:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)
$x0 = COPY %0:_(<64 x s8>)
PseudoRET implicit $lr, implicit $x0
...

---
name: concat_vector_reverse_32_512
legalized: false
body: |
bb.1.entry:
liveins: $wl2, $wl4
; CHECK-LABEL: name: concat_vector_reverse_32_512
; CHECK: liveins: $wl2, $wl4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[COPY1]](<8 x s32>), [[COPY]](<8 x s32>)
; CHECK-NEXT: $x0 = COPY [[CONCAT_VECTORS]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
%1:_(<8 x s32>) = COPY $wl2
%2:_(<8 x s32>) = COPY $wl4
%0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
$x0 = COPY %0:_(<16 x s32>)
PseudoRET implicit $lr, implicit $x0
...

---
name: extract_vector_1024_to_512
legalized: false
body: |
bb.1.entry:
liveins: $y2
; CHECK-LABEL: name: extract_vector_1024_to_512
; CHECK: liveins: $y2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
; CHECK-NEXT: $x0 = COPY [[UV]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
%1:_(<32 x s32>) = COPY $y2
%0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
$x0 = COPY %0:_(<16 x s32>)
PseudoRET implicit $lr, implicit $x0
...

---
name: extract_vector_1024_to_256
legalized: false
body: |
bb.1.entry:
liveins: $y2
; CHECK-LABEL: name: extract_vector_1024_to_256
; CHECK: liveins: $y2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV]](<16 x s32>)
; CHECK-NEXT: $wl0 = COPY [[UV2]](<8 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
%1:_(<32 x s32>) = COPY $y2
%0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7)
$wl0 = COPY %0:_(<8 x s32>)
PseudoRET implicit $lr, implicit $x0
...

---
name: extract_vector_1024_to_128
legalized: false
body: |
bb.1.entry:
liveins: $y2
; CHECK-LABEL: name: extract_vector_1024_to_128
; CHECK: liveins: $y2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV]](<16 x s32>)
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[UV2]](<8 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_UNPAD_VECTOR]](<4 x s32>)
%1:_(<32 x s32>) = COPY $y2
%0:_(<4 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3)
PseudoRET implicit $lr, implicit %0
...

---
name: extract_vector_1024_to_32
legalized: false
body: |
bb.1.entry:
liveins: $y2
; CHECK-LABEL: name: extract_vector_1024_to_32
; CHECK: liveins: $y2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<32 x s8>), [[UV3:%[0-9]+]]:_(<32 x s8>) = G_UNMERGE_VALUES [[UV]](<64 x s8>)
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_AIE_UNPAD_VECTOR [[UV2]](<32 x s8>)
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<8 x s8>), [[UV5:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[AIE_UNPAD_VECTOR]](<16 x s8>)
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s8>), [[UV7:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[UV4]](<8 x s8>)
; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s8>), [[UV9:%[0-9]+]]:_(<2 x s8>) = G_UNMERGE_VALUES [[UV6]](<4 x s8>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV8]](<2 x s8>)
%1:_(<128 x s8>) = COPY $y2
%0:_(<2 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %1:_, shufflemask(0, 1)
PseudoRET implicit $lr, implicit %0
...

---
name: extract_vector_second_half_512_to_256
legalized: false
body: |
bb.1.entry:
liveins: $x0, $x1
; CHECK-LABEL: name: extract_vector_second_half_512_to_256
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<8 x s32>)
%1:_(<16 x s32>) = COPY $x0
%2:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %1:_(<16 x s32>), shufflemask(8, 9, 10, 11, 12, 13, 14, 15)
PseudoRET implicit $lr, implicit %2
...

---
name: extract_vector_second_half_512_to_128
legalized: false
body: |
bb.1.entry:
liveins: $x0, $x1
; CHECK-LABEL: name: extract_vector_second_half_512_to_128
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[UV]](<8 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV3]](<4 x s32>)
%1:_(<16 x s32>) = COPY $x0
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %1:_(<16 x s32>), shufflemask(4, 5, 6, 7)
PseudoRET implicit $lr, implicit %2
...

---
name: extract_vector_second_half_1024_to_512
legalized: false
body: |
bb.1.entry:
liveins: $y2, $y3
; CHECK-LABEL: name: extract_vector_second_half_1024_to_512
; CHECK: liveins: $y2, $y3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<64 x s8>)
%1:_(<128 x s8>) = COPY $y2
%2:_(<64 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %1:_(<128 x s8>), shufflemask(64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127)
PseudoRET implicit $lr, implicit %2
...

---
name: extract_vector_second_half_1024_to_32
legalized: false
body: |
bb.1.entry:
liveins: $y2, $y3
; CHECK-LABEL: name: extract_vector_second_half_1024_to_32
; CHECK: liveins: $y2, $y3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<32 x s8>), [[UV3:%[0-9]+]]:_(<32 x s8>) = G_UNMERGE_VALUES [[UV]](<64 x s8>)
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_AIE_UNPAD_VECTOR [[UV2]](<32 x s8>)
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<8 x s8>), [[UV5:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[AIE_UNPAD_VECTOR]](<16 x s8>)
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s8>), [[UV7:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[UV4]](<8 x s8>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV7]](<4 x s8>)
%1:_(<128 x s8>) = COPY $y2
%2:_(<4 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %1:_(<128 x s8>), shufflemask(4, 5, 6, 7)
PseudoRET implicit $lr, implicit %2
Loading

0 comments on commit 74dca3d

Please sign in to comment.