Skip to content

Commit

Permalink
[GISel][CombinerHelper] Add a combiner to concatenate the first halfs…
Browse files Browse the repository at this point in the history
… of two vectors together
  • Loading branch information
ValentijnvdBeek committed Sep 23, 2024
1 parent 967bba2 commit c38562a
Show file tree
Hide file tree
Showing 6 changed files with 353 additions and 142 deletions.
38 changes: 38 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,44 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
return true;
}

// After this point, it is assumed our shufflevectors work on vectors that can
// be splint into two
if ((DstNumElts % 2) != 0)
return false;

// {1, 2, ..., n/4, n/2, n/2+1, .... 3n/4} -> G_UNMERGE_VALUES
// Take the first halfs of the two vectors and concatenate them into one
// vector.
GeneratorType FirstEightA = adderGenerator(0, (DstNumElts / 2) - 1, 1);
GeneratorType FirstEightB =
adderGenerator(DstNumElts, DstNumElts + (DstNumElts / 2) - 1, 1);

auto UnmergeMatcher = SmallVector<GeneratorType>{FirstEightA, FirstEightB};
GeneratorType FirstAndThird = concatGenerators(UnmergeMatcher);
if (matchCombineShuffleVector(MI, FirstAndThird, (DstNumElts / 2) - 1)) {
if (DstNumElts <= 2)
return false;
const Register DstReg = MI.getOperand(0).getReg();
const LLT HalfSrcTy =
LLT::fixed_vector(SrcNumElts / 2, SrcTy.getScalarType());
const Register HalfOfA = createUnmergeValue(
MI, MI.getOperand(1).getReg(),
MRI.createGenericVirtualRegister(HalfSrcTy), 0, 0, SrcNumElts);
const Register HalfOfB = createUnmergeValue(
MI, MI.getOperand(2).getReg(),
MRI.createGenericVirtualRegister(HalfSrcTy), 0, 0, SrcNumElts);

const ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
if (Mask[0] <= 0) {
Builder.buildMergeLikeInstr(DstReg, {HalfOfA, HalfOfB});
} else {
Builder.buildMergeLikeInstr(DstReg, {HalfOfB, HalfOfA});
}

MI.eraseFromParent();
return true;
}

return false;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s
# Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates

---
name: shuffle_concat_1
Expand Down Expand Up @@ -101,7 +102,9 @@ body: |
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %a(<4 x s8>), %b(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>)
; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %c(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>)
; CHECK-NEXT: %z:_(<16 x s8>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<16 x s8>), [[CONCAT_VECTORS1]], shufflemask(0, undef, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, undef, undef, undef, undef)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s8>), [[UV1:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s8>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s8>), [[UV3:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<16 x s8>)
; CHECK-NEXT: %z:_(<16 x s8>) = G_CONCAT_VECTORS [[UV]](<8 x s8>), [[UV2]](<8 x s8>)
; CHECK-NEXT: $q0 = COPY %z(<16 x s8>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%p1:_(p0) = COPY $x0
Expand Down Expand Up @@ -179,7 +182,9 @@ body: |
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %a(<4 x s8>), %b(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>)
; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %c(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>)
; CHECK-NEXT: %z:_(<16 x s8>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<16 x s8>), [[CONCAT_VECTORS1]], shufflemask(undef, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, undef, undef, undef, undef)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s8>), [[UV1:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s8>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s8>), [[UV3:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<16 x s8>)
; CHECK-NEXT: %z:_(<16 x s8>) = G_CONCAT_VECTORS [[UV]](<8 x s8>), [[UV2]](<8 x s8>)
; CHECK-NEXT: $q0 = COPY %z(<16 x s8>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%p1:_(p0) = COPY $x0
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple aarch64-apple-ios -run-pass=aarch64-prelegalizer-combiner %s -o - | FileCheck %s
# Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates

# Check that we canonicalize shuffle_vector(Src1, Src2, mask(0,1,2,3))
# into concat_vector(Src1, Src2).
Expand Down Expand Up @@ -270,8 +271,10 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(4, 5, 0, 1)
; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV2]](<2 x s32>), [[UV]](<2 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<4 x s32>)
%0:_(<4 x s32>) = COPY $q0
%1:_(<4 x s32>) = COPY $q1
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1(<4 x s32>), shufflemask(4,5,0,1)
Expand Down
73 changes: 19 additions & 54 deletions llvm/test/CodeGen/AArch64/arm64-neon-copy.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates

; CHECK-GI: warning: Instruction selection used fallback path for test_bitcastv2f32tov1f64
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_bitcastv1f64tov2f32
Expand Down Expand Up @@ -1776,19 +1777,10 @@ entry:
}

define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
; CHECK-SD-LABEL: test_concat_v16i8_v16i8_v16i8:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_concat_v16i8_v16i8_v16i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: adrp x8, .LCPI126_0
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI126_0]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: ret
entry:
%vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <16 x i8> %vecinit30
Expand All @@ -1803,9 +1795,7 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
;
; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v16i8:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: adrp x8, .LCPI127_0
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov b2, v0.b[1]
; CHECK-GI-NEXT: mov b3, v0.b[2]
; CHECK-GI-NEXT: mov b4, v0.b[3]
Expand All @@ -1814,14 +1804,13 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
; CHECK-GI-NEXT: mov b7, v0.b[6]
; CHECK-GI-NEXT: mov b16, v0.b[7]
; CHECK-GI-NEXT: mov v0.b[1], v2.b[0]
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI127_0]
; CHECK-GI-NEXT: mov v0.b[2], v3.b[0]
; CHECK-GI-NEXT: mov v0.b[3], v4.b[0]
; CHECK-GI-NEXT: mov v0.b[4], v5.b[0]
; CHECK-GI-NEXT: mov v0.b[5], v6.b[0]
; CHECK-GI-NEXT: mov v0.b[6], v7.b[0]
; CHECK-GI-NEXT: mov v0.b[7], v16.b[0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ret
entry:
%vecext = extractelement <8 x i8> %x, i32 0
Expand Down Expand Up @@ -1999,19 +1988,10 @@ entry:
}

define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
; CHECK-SD-LABEL: test_concat_v8i16_v8i16_v8i16:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_concat_v8i16_v8i16_v8i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: adrp x8, .LCPI130_0
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI130_0]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: ret
entry:
%vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
ret <8 x i16> %vecinit14
Expand All @@ -2026,17 +2006,14 @@ define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
;
; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v8i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: adrp x8, .LCPI131_0
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov h2, v0.h[1]
; CHECK-GI-NEXT: mov h3, v0.h[2]
; CHECK-GI-NEXT: mov h4, v0.h[3]
; CHECK-GI-NEXT: mov v0.h[1], v2.h[0]
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI131_0]
; CHECK-GI-NEXT: mov v0.h[2], v3.h[0]
; CHECK-GI-NEXT: mov v0.h[3], v4.h[0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ret
entry:
%vecext = extractelement <4 x i16> %x, i32 0
Expand Down Expand Up @@ -2142,19 +2119,10 @@ entry:
}

define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
; CHECK-SD-LABEL: test_concat_v4i32_v4i32_v4i32:
; CHECK-SD: // %bb.0: // %entry
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_concat_v4i32_v4i32_v4i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: adrp x8, .LCPI134_0
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI134_0]
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: ret
; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: mov v0.d[1], v1.d[0]
; CHECK-NEXT: ret
entry:
%vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x i32> %vecinit6
Expand All @@ -2169,13 +2137,10 @@ define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
;
; CHECK-GI-LABEL: test_concat_v4i32_v2i32_v4i32:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: adrp x8, .LCPI135_0
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NEXT: mov s2, v0.s[1]
; CHECK-GI-NEXT: mov v0.s[1], v2.s[0]
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI135_0]
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NEXT: ret
entry:
%vecext = extractelement <2 x i32> %x, i32 0
Expand Down
Loading

0 comments on commit c38562a

Please sign in to comment.