From 16445fb7d1ae5ecec7584ac2780cbd3a1310da4c Mon Sep 17 00:00:00 2001 From: Sai Abhinay Anubola Date: Thu, 14 Nov 2024 12:21:47 +0530 Subject: [PATCH 1/3] Combine VExtract intrinsics into generic opcode in PreLegalizerCombiner --- .../Target/AIE/AIE2PreLegalizerCombiner.cpp | 28 +++ .../combine-vextract-prelegalizer.mir | 194 ++++++++++++++++++ .../AIE/aie2/GlobalISel/legalize-bfloat16.mir | 5 +- 3 files changed, 224 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AIE/aie2/GlobalISel/combine-vextract-prelegalizer.mir diff --git a/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp b/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp index 500b06adf019..28d787e91828 100644 --- a/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp @@ -80,6 +80,8 @@ class AIE2PreLegalizerCombinerImpl : public Combiner { bool tryToCombineVectorInserts(MachineInstr &MI, unsigned SclSrcBits) const; + bool tryToCombineVExtractElt(MachineInstr &MI) const; + bool tryToCombineIntrinsic(MachineInstr &MI) const; private: @@ -288,6 +290,27 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineVectorInserts( return true; } +bool AIE2PreLegalizerCombinerImpl::tryToCombineVExtractElt( + MachineInstr &MI) const { + const Register DstReg = MI.getOperand(0).getReg(); + // In this case of G_INTRINSIC operand 1 is target intrinsic + const Register SrcReg = MI.getOperand(2).getReg(); + const Register IdxReg = MI.getOperand(3).getReg(); + const Register SignReg = MI.getOperand(4).getReg(); + + const auto SignVal = getIConstantVRegSExtVal(SignReg, MRI); + if (!SignVal) + return false; + auto *TII = static_cast(STI.getInstrInfo()); + const unsigned Opcode = + TII->getGenericExtractVectorEltOpcode(SignVal.value()); + MachineIRBuilder MIRBuilder(MI); + MIRBuilder.buildInstr(Opcode, {DstReg}, {SrcReg, IdxReg}); + + MI.eraseFromParent(); + return true; +} + bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic( MachineInstr &MI) const { const unsigned IntrinsicID = cast(MI).getIntrinsicID(); @@ -306,6 +329,11 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic( case Intrinsic::aie2_vinsert32_I512: { return tryToCombineVectorInserts(MI, getVInsertScalarSize(IntrinsicID)); } + case Intrinsic::aie2_vextract_elem8_I512: + case Intrinsic::aie2_vextract_elem16_I512: + case Intrinsic::aie2_vextract_elem32_I512: { + return tryToCombineVExtractElt(MI); + } default: break; } diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/combine-vextract-prelegalizer.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/combine-vextract-prelegalizer.mir new file mode 100644 index 000000000000..c829ca570fa5 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/combine-vextract-prelegalizer.mir @@ -0,0 +1,194 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -mtriple aie2 -run-pass=aie2-prelegalizer-combiner %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: vextract.8.zext +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.8.zext + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s32) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + PseudoRET implicit $lr, implicit %3 +... + +--- +name: vextract.8.sext +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.8.sext + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + PseudoRET implicit $lr, implicit %3 +... + +# Negative Test Case: Combining is not possible because the vextract8 has a non-constant sign register +--- +name: vextract.8.neg +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.8.neg + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), [[COPY1]](<64 x s8>), [[C]](s32), [[COPY]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT]](s32) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = COPY $r1 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + PseudoRET implicit $lr, implicit %3 +... + +--- +name: vextract.16.zext +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.16.zext + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s32) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + PseudoRET implicit $lr, implicit %3 +... + +--- +name: vextract.16.sext +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.16.sext + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + PseudoRET implicit $lr, implicit %3 +... + +# Negative Test Case: Combining is not possible because the vextract16 has a non-constant sign register +--- +name: vextract.16.neg +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.16.neg + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY1]](<32 x s16>), [[C]](s32), [[COPY]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT]](s32) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = COPY $r1 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + PseudoRET implicit $lr, implicit %3 +... + +--- +name: vextract.32.zext +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.32.zext + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[C]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s32) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<16 x s32>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %2(<16 x s32>), %0(s32), %1(s32) + PseudoRET implicit $lr, implicit %3 +... + +--- +name: vextract.32.sext +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.32.sext + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[C]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(<16 x s32>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %2(<16 x s32>), %0(s32), %1(s32) + PseudoRET implicit $lr, implicit %3 +... + +# Negative Test Case: Combining is not possible because the vextract32 has a non-constant sign register +--- +name: vextract.32.neg +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.32.neg + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), [[COPY1]](<16 x s32>), [[C]](s32), [[COPY]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT]](s32) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = COPY $r1 + %2:_(<16 x s32>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %2(<16 x s32>), %0(s32), %1(s32) + PseudoRET implicit $lr, implicit %3 +... diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-bfloat16.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-bfloat16.mir index f7a05db826c2..05fc72ec0056 100644 --- a/llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-bfloat16.mir +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-bfloat16.mir @@ -17,9 +17,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0 ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY]](<32 x s16>), [[C]](s32), [[C1]](s32) - ; CHECK-NEXT: $r0 = COPY [[INT]](s32) + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32) + ; CHECK-NEXT: $r0 = COPY [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32) ; CHECK-NEXT: PseudoRET implicit $lr, implicit $r0, implicit $x0 %0:_(s32) = COPY $r1 %1:_(<32 x s16>) = COPY $x0 From 02d7e59d0a04d6e0563f359704b26e110a68c9bf Mon Sep 17 00:00:00 2001 From: Sai Abhinay Anubola Date: Tue, 19 Nov 2024 15:24:11 +0530 Subject: [PATCH 2/3] Combine G_TRUNC and G_Z/SEXT users of vextract.8/16 in PreLegalizerCombiner --- .../Target/AIE/AIE2PreLegalizerCombiner.cpp | 65 ++++++ .../combine-vextract-prelegalizer.mir | 200 ++++++++++++++++++ 2 files changed, 265 insertions(+) diff --git a/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp b/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp index 28d787e91828..247e920b842b 100644 --- a/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp @@ -80,6 +80,9 @@ class AIE2PreLegalizerCombinerImpl : public Combiner { bool tryToCombineVectorInserts(MachineInstr &MI, unsigned SclSrcBits) const; + bool tryToCombineTruncExt(Register DstReg, bool SignVal, + unsigned SrcEltSize) const; + bool tryToCombineVExtractElt(MachineInstr &MI) const; bool tryToCombineIntrinsic(MachineInstr &MI) const; @@ -290,6 +293,62 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineVectorInserts( return true; } +/// \returns true if it is possible to combine the below sequence of MIRs +/// From : %10:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), +/// %2(<32 x s16>), %0(s32), %1(s32) +/// %20:_(s16) = G_TRUNC %10(s32) +/// %30:_(s20) = G_SEXT %20(s16) +/// To : %10:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), +/// %2(<32 x s16>), %0(s32), %1(s32) +/// %30:_(s20) = G_TRUNC %10(s32) +/// Or even: +/// From : %10:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), +/// %2(<64 x s8>), %0(s32), %1(s32) +/// %20:_(s8) = G_TRUNC %10(s32) +/// %30:_(s20) = G_SEXT %20(s8) +/// To : %10:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), +/// %2(<64 x s8>), %0(s32), %1(s32) +/// %30:_(s20) = G_TRUNC %10(s32) +/// This also enables S20Narrowing for vextract +bool AIE2PreLegalizerCombinerImpl::tryToCombineTruncExt( + Register DstReg, bool SignVal, unsigned SrcEltSize) const { + // Checks if a given register has non-debug user with a specific opcode and + // destination size, and return that user. + auto GetUseWithOpCode = + [&](const Register Reg, const unsigned OpcodeToCheck, + const unsigned DstSize) -> std::optional { + for (auto &Use : MRI.use_nodbg_instructions(Reg)) { + if (Use.getOpcode() == OpcodeToCheck) { + const LLT DstRegTy = MRI.getType(Use.getOperand(0).getReg()); + if (DstRegTy.getSizeInBits() == DstSize) + return &Use; + } + } + return std::nullopt; + }; + + if (auto Trunc = + GetUseWithOpCode(DstReg, TargetOpcode::G_TRUNC, SrcEltSize)) { + MachineInstr *TruncMI = Trunc.value(); + const unsigned ExtOpcode = + SignVal ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; + const Register UseDstReg = TruncMI->getOperand(0).getReg(); + // Ensure G_TRUNC has a single non-debug user before safely eliminating it. + if (!MRI.hasOneNonDBGUser(UseDstReg)) + return false; + if (auto Ext = GetUseWithOpCode(UseDstReg, ExtOpcode, 20)) { + MachineInstr *ExtMI = Ext.value(); + MachineIRBuilder MIRBuilder(*ExtMI); + MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {ExtMI->getOperand(0)}, + {DstReg}); + ExtMI->eraseFromParent(); + TruncMI->eraseFromParent(); + return true; + } + } + return false; +} + bool AIE2PreLegalizerCombinerImpl::tryToCombineVExtractElt( MachineInstr &MI) const { const Register DstReg = MI.getOperand(0).getReg(); @@ -301,6 +360,12 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineVExtractElt( const auto SignVal = getIConstantVRegSExtVal(SignReg, MRI); if (!SignVal) return false; + const LLT SrcVecTy = MRI.getType(SrcReg); + const unsigned SrcEltSize = SrcVecTy.getScalarSizeInBits(); + if (SrcEltSize == 8 || SrcEltSize == 16) { + tryToCombineTruncExt(DstReg, SignVal.value(), SrcEltSize); + } + auto *TII = static_cast(STI.getInstrInfo()); const unsigned Opcode = TII->getGenericExtractVectorEltOpcode(SignVal.value()); diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/combine-vextract-prelegalizer.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/combine-vextract-prelegalizer.mir index c829ca570fa5..a6dadf26cf03 100644 --- a/llvm/test/CodeGen/AIE/aie2/GlobalISel/combine-vextract-prelegalizer.mir +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/combine-vextract-prelegalizer.mir @@ -192,3 +192,203 @@ body: | %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %2(<16 x s32>), %0(s32), %1(s32) PseudoRET implicit $lr, implicit %3 ... + +--- +name: vextract.8.trunc.sext +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.8.trunc.sext + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](s20) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + %4:_(s8) = G_TRUNC %3(s32) + %5:_(s20) = G_SEXT %4(s8) + PseudoRET implicit $lr, implicit %5 +... + +--- +name: vextract.8.trunc.zext +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.8.trunc.zext + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](s20) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + %4:_(s8) = G_TRUNC %3(s32) + %5:_(s20) = G_ZEXT %4(s8) + PseudoRET implicit $lr, implicit %5 +... + +# Negative Test Case: Combining is not possible because there mismatch in extension type (zero/signed) between vextract and G_SEXT +--- +name: vextract.8.trunc.ext.neg +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.8.trunc.ext.neg + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s20) = G_SEXT [[TRUNC]](s8) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SEXT]](s20) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + %4:_(s8) = G_TRUNC %3(s32) + %5:_(s20) = G_SEXT %4(s8) + PseudoRET implicit $lr, implicit %5 +... + +# Negative Test Case: Combining is not possible because the destination register of G_TRUNC has multiple uses. +--- +name: vextract.8.trunc.zext.neg +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.8.trunc.zext.neg + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s20) = G_ZEXT [[TRUNC]](s8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: G_STORE [[TRUNC]](s8), [[C1]](p0) :: (store (s8)) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ZEXT]](s20) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + %4:_(s8) = G_TRUNC %3(s32) + %5:_(s20) = G_ZEXT %4(s8) + %6:_(p0) = G_CONSTANT i20 0 + G_STORE %4(s8), %6(p0) :: (store (s8)) + PseudoRET implicit $lr, implicit %5 +... + +--- +name: vextract.16.trunc.zext +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.16.trunc.zext + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](s20) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + %4:_(s16) = G_TRUNC %3(s32) + %5:_(s20) = G_ZEXT %4(s16) + PseudoRET implicit $lr, implicit %5 +... + +--- +name: vextract.16.trunc.sext +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.16.trunc.sext + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](s20) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + %4:_(s16) = G_TRUNC %3(s32) + %5:_(s20) = G_SEXT %4(s16) + PseudoRET implicit $lr, implicit %5 +... + +# Negative Test Case: Combining is not possible because there mismatch in extension type (zero/signed) between vextract and G_ZEXT +--- +name: vextract.16.trunc.ext.neg +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.16.trunc.ext.neg + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s20) = G_ZEXT [[TRUNC]](s16) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ZEXT]](s20) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + %4:_(s16) = G_TRUNC %3(s32) + %5:_(s20) = G_ZEXT %4(s16) + PseudoRET implicit $lr, implicit %5 +... + +# Negative Test Case: Combining is not possible because the destination register of G_TRUNC has multiple uses. +--- +name: vextract.16.trunc.sext.neg +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.16.trunc.sext.neg + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s20) = G_SEXT [[TRUNC]](s16) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[C1]](p0) :: (store (s16)) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SEXT]](s20) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + %4:_(s16) = G_TRUNC %3(s32) + %5:_(s20) = G_SEXT %4(s16) + %6:_(p0) = G_CONSTANT i20 0 + G_STORE %4(s16), %6(p0) :: (store (s16)) + PseudoRET implicit $lr, implicit %5 +... From 6138a604b9afa88c318b7206210a7db20e9927ed Mon Sep 17 00:00:00 2001 From: Sai Abhinay Anubola Date: Fri, 8 Nov 2024 15:01:46 +0530 Subject: [PATCH 3/3] Support for allowing direct VEXTRACT to 20-bit registers --- llvm/lib/Target/AIE/AIE2InstrInfo.cpp | 5 +- llvm/lib/Target/AIE/AIECombinerHelper.cpp | 48 ++- .../prelegalizercombiner-s20-narrowing.mir | 291 ++++++++++++++++++ .../verifier/verify-szext-extract-vec-elt.mir | 4 +- 4 files changed, 336 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp index cdd39e6732fa..4e6df730b9a1 100644 --- a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp +++ b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp @@ -152,8 +152,9 @@ bool AIE2InstrInfo::verifyGenericInstruction(const MachineInstr &MI, switch (MI.getOpcode()) { case AIE2::G_AIE_ZEXT_EXTRACT_VECTOR_ELT: case AIE2::G_AIE_SEXT_EXTRACT_VECTOR_ELT: - ErrInfo = "Expected 32bit scalar destination"; - return MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32); + ErrInfo = "Expected 32bit or 20bit scalar destination"; + return (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32) || + MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(20)); case AIE2::G_AIE_PAD_VECTOR_UNDEF: return verifySameLaneTypes(MI, ErrInfo) && isLegalTypeToUnpad(MRI.getType(MI.getOperand(0).getReg()), diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.cpp b/llvm/lib/Target/AIE/AIECombinerHelper.cpp index 2272c13d047d..a22d4cab7d27 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.cpp +++ b/llvm/lib/Target/AIE/AIECombinerHelper.cpp @@ -567,6 +567,18 @@ void llvm::applyGlobalValOffset(MachineInstr &MI, MachineRegisterInfo &MRI, B.buildConstant(LLT::scalar(20), -static_cast(Offset))); } +/// Determine if the instruction is a generic extract vector element operation +static bool IsGenericExtractVectorElt(const MachineInstr &MI) { + const AIEBaseSubtarget &STI = AIEBaseSubtarget::get(*MI.getMF()); + const AIEBaseInstrInfo *TII = STI.getInstrInfo(); + const unsigned Opcode = MI.getOpcode(); + + if (Opcode == TII->getGenericExtractVectorEltOpcode(false) || + Opcode == TII->getGenericExtractVectorEltOpcode(true)) + return true; + return false; +} + /// Checks whether the instruction produces or can be adapted to produce /// a single S20 output. static bool canProduceS20(const MachineRegisterInfo &MRI, @@ -581,9 +593,12 @@ static bool canProduceS20(const MachineRegisterInfo &MRI, case TargetOpcode::G_CONSTANT: case TargetOpcode::G_IMPLICIT_DEF: return true; - default: + default: { + if (IsGenericExtractVectorElt(MI)) + return true; return false; } + } } /// The function checks if the node can be adapted to produce an S20 value, and @@ -901,15 +916,19 @@ bool modifyToS20(InstrNode Start, MachineRegisterInfo &MRI, MachineIRBuilder &B, return true; } default: { - LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI); - llvm_unreachable("Unexpected OpCode, while modifying IR"); + if (IsGenericExtractVectorElt(*StartNodeMI)) { + Observer.changingInstr(*StartNodeMI); + MRI.setType(StartNodeMI->getOperand(0).getReg(), S20); + Observer.changedInstr(*StartNodeMI); + } else { + LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI); + llvm_unreachable("Unexpected OpCode, while modifying IR"); + } } } - switch (StartNodeMI->getOpcode()) { - case TargetOpcode::COPY: - case TargetOpcode::G_LOAD: - case TargetOpcode::G_PHI: { + // Function to handle the modification of instructions + auto ModifyInstructionUses = [&](MachineInstr *StartNodeMI) { const auto UseInstIter = MRI.use_nodbg_instructions(StartNodeMI->getOperand(0).getReg()); std::vector UseInstr; @@ -924,11 +943,22 @@ bool modifyToS20(InstrNode Start, MachineRegisterInfo &MRI, MachineIRBuilder &B, if (!modifyToS20(NextNodeToModify, MRI, B, Observer, Helper)) llvm_unreachable("All input nodes should have updated"); } + }; + + switch (StartNodeMI->getOpcode()) { + case TargetOpcode::COPY: + case TargetOpcode::G_LOAD: + case TargetOpcode::G_PHI: { + ModifyInstructionUses(StartNodeMI); break; } default: { - LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI); - llvm_unreachable("Unexpected OpCode, while modifying IR"); + if (IsGenericExtractVectorElt(*StartNodeMI)) { + ModifyInstructionUses(StartNodeMI); + } else { + LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI); + llvm_unreachable("Unexpected OpCode, while modifying IR"); + } } } return true; diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-s20-narrowing.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-s20-narrowing.mir index cadd495db6af..834717d24ad4 100644 --- a/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-s20-narrowing.mir +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-s20-narrowing.mir @@ -870,3 +870,294 @@ body: | G_STORE %50:_(s16), %6:_(p0) :: (store (s16)) G_BR %bb.2 ... + +--- +name: valid_vextract8_add2d +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: valid_vextract8_add2d + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +--- +name: valid_vextract16_add2d +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: valid_vextract16_add2d + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +--- +name: valid_vextract32_add2d +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: valid_vextract32_add2d + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<16 x s32>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %2(<16 x s32>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +# Only one Src Node (vextract8) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed +--- +name: valid_vextract8_PTR_ADD +legalized: false +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: valid_vextract8_PTR_ADD + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<64 x s8>), [[C]](s32) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32)) + ; CHECK-NEXT: G_BR %bb.1 + bb.1: + successors: %bb.2(0x80000000); %bb.2(100.00%) + liveins: $p0, $x0 + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(<64 x s8>) = COPY $x0 + %4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %3(<64 x s8>), %1(s32), %2(s32) + + bb.2: + successors: %bb.2(0x80000000); %bb.2(100.00%) + + %5:_(s20) = G_TRUNC %4:_(s32) + %6:_(p0) = G_PTR_ADD %0:_, %5:_(s20) + G_STORE %1:_(s32), %6:_(p0) :: (store (s32)) + G_BR %bb.2 +... + +# Only one Src Node (vextract16) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed +--- +name: valid_vextract16_PTR_ADD +legalized: false +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: valid_vextract16_PTR_ADD + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<32 x s16>), [[C]](s32) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32)) + ; CHECK-NEXT: G_BR %bb.1 + bb.1: + successors: %bb.2(0x80000000); %bb.2(100.00%) + liveins: $p0, $x0 + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(<32 x s16>) = COPY $x0 + %4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %3(<32 x s16>), %1(s32), %2(s32) + + bb.2: + successors: %bb.2(0x80000000); %bb.2(100.00%) + + %5:_(s20) = G_TRUNC %4:_(s32) + %6:_(p0) = G_PTR_ADD %0:_, %5:_(s20) + G_STORE %1:_(s32), %6:_(p0) :: (store (s32)) + G_BR %bb.2 +... + +# Only one Src Node (vextract32) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed +--- +name: valid_vextract32_PTR_ADD +legalized: false +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: valid_vextract32_PTR_ADD + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<16 x s32>), [[C]](s32) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32)) + ; CHECK-NEXT: G_BR %bb.1 + bb.1: + successors: %bb.2(0x80000000); %bb.2(100.00%) + liveins: $p0, $x0 + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(<16 x s32>) = COPY $x0 + %4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %3(<16 x s32>), %1(s32), %2(s32) + + bb.2: + successors: %bb.2(0x80000000); %bb.2(100.00%) + + %5:_(s20) = G_TRUNC %4:_(s32) + %6:_(p0) = G_PTR_ADD %0:_, %5:_(s20) + G_STORE %1:_(s32), %6:_(p0) :: (store (s32)) + G_BR %bb.2 +... + +# Negative Test Case: Narrowing to s20 is not possible because the vextract8 source node has a non-constant sign register +--- +name: valid_vextract8_add2d_neg +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $r0 + + ; CHECK-LABEL: name: valid_vextract8_add2d_neg + ; CHECK: liveins: $x0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), [[COPY1]](<64 x s8>), [[C]](s32), [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT1]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = COPY $r0 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +# Negative Test Case: Narrowing to s20 is not possible because the vextract16 source node has a non-constant sign register +--- +name: valid_vextract16_add2d_neg +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $r0 + + ; CHECK-LABEL: name: valid_vextract16_add2d_neg + ; CHECK: liveins: $x0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY1]](<32 x s16>), [[C]](s32), [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT1]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = COPY $r0 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +# Negative Test Case: Narrowing to s20 is not possible because the vextract32 source node has a non-constant sign register +--- +name: valid_vextract32_add2d_neg +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $r0 + + ; CHECK-LABEL: name: valid_vextract32_add2d_neg + ; CHECK: liveins: $x0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), [[COPY1]](<16 x s32>), [[C]](s32), [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT1]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = COPY $r0 + %2:_(<16 x s32>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %2(<16 x s32>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... diff --git a/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir b/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir index b9f489914d56..b56fa14667f8 100644 --- a/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir +++ b/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir @@ -18,6 +18,8 @@ body: | %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) %3:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) + %4:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) + %5:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) ... --- @@ -25,7 +27,7 @@ name: nok alignment: 16 body: | bb.0 (align 16): - ; CHECK-COUNT-4: Bad machine code: Expected 32bit scalar destination + ; CHECK-COUNT-4: Bad machine code: Expected 32bit or 20bit scalar destination ; CHECK-NOT: Bad machine code %0:_(<16 x s16>) = COPY $wl0 %1:_(s32) = G_CONSTANT i32 1